private void StartTrainButton_Click(object sender, RoutedEventArgs eventArgs)
        {
            if (Network == null || TrainingData == null)
            {
                MessageBox.Show("You didn't create network or get data points!\n");
            }
            else
            {
                try
                {
                    TrainingParameters       p   = GetParameters();
                    StandardTrainingObserver sno = Network.Dimensions == 2 ? new StandardTrainingObserver() : null;
                    Network.Observer = sno;

                    Network.Train(p);
                    IErrorCalculator err = new QuantizationErrorCalculator()
                    {
                        DistanceCalculator = Network.DistanceCalculator
                    };
                    SomInformation.Text += $"\nSom trained. Quantization error: {err.CalculateError(Network.Neurons, TrainingData):F} ";
                    if (sno != null)
                    {
                        DisplayIfTwoDimensional(sno);
                    }
                }
                catch (Exception e)
                {
                    MessageBox.Show(e.Message);
                }
            }
        }
Esempio n. 2
0
        /// <summary>
        /// save specific model into final model location,  delete temporary models and return model path
        /// </summary>
        /// <param name="trParams">Training parameters</param>
        /// <param name="tpl">Temporary stored model information</param>
        /// <returns></returns>
        private static string saveModel(TrainingParameters trParams, Tuple <double, double, string> tpl)
        {
            //extract file name from temp_models dir
            var tempModelsDir = Path.GetDirectoryName(tpl.Item3);
            var dirInfo       = new DirectoryInfo(tempModelsDir);

            //folder for final model
            var finDir = trParams.ModelFinalLocation;

            //in case directory doesn't exist
            if (!Directory.Exists(finDir))
            {
                Directory.CreateDirectory(finDir);
            }

            //copy best model to final location
            var fName    = Path.GetFileName(tpl.Item3);
            var fullPath = Path.Combine(finDir, fName);

            File.Copy(tpl.Item3, fullPath);

            //delete temp_modes folder
            MLFactory.DeleteAllFiles(tempModelsDir);
            var bestModelPath = $"{MLFactory.m_MLModelFolder}\\{fName}";//return always relative path

            return(bestModelPath);
        }
        private TrainingParameters GetParameters()
        {
            int epochs           = Int32.Parse(TrainingEpochs.Text);
            int iterations       = Int32.Parse(TrainingIterations.Text);
            TrainingParameters p = p = new KMeansTrainingParameters()
            {
                Epochs       = epochs,
                TrainingData = TrainingData
            };

            if (Network is SelfOrganizingMap)
            {
                int    kmax            = TrainingData.Count * epochs;
                double minLearningRate = double.Parse(TrainingLearningRateMin.Text, CultureInfo.InvariantCulture);
                double maxLearningRate = double.Parse(TrainingLearningRateMax.Text, CultureInfo.InvariantCulture);
                p = new SomTrainingParameters()
                {
                    LearningRate          = new DeclineExponentially(kmax, minLearningRate, maxLearningRate),
                    TirednessMechanism    = GetTiredMechanism(),
                    NeighbourhoodFunction = GetNeighbourhoodFunction(kmax),
                    NumberOfIterations    = iterations,
                    TrainingData          = TrainingData
                };
            }

            return(p);
        }
Esempio n. 4
0
        private void UpdateWeightsAdam(TrainingParameters trainingParameters, int timeStep)
        {
            int numberOfWeightMatrices = _weights.Count;

            for (int i = 0; i < numberOfWeightMatrices; i++)
            {
                int numberOfWeightRows    = _weights[i].GetLength(0);
                int numberOfWeightColumns = _weights[i].GetLength(1);

                for (int j = 0; j < numberOfWeightRows; j++)
                {
                    for (int k = 0; k < numberOfWeightColumns; k++)
                    {
                        timeStep += 1;

                        _m = _Beta1 * _m + (1 - _Beta1) * _deltaWeightMatrices[i][j, k];

                        double mt = _m / (1 - Math.Pow(_Beta1, timeStep));

                        _v = _Beta2 * _v + (1 - _Beta2) * Math.Pow(_deltaWeightMatrices[i][j, k], 2);

                        double vt = _v / (1 - Math.Pow(_Beta2, timeStep));

                        var deltaWeight = trainingParameters.learningRate * (mt / Math.Sqrt(vt + _epsilon));

                        _weights[i][j, k] -= deltaWeight + _previousDeltaWeights[i][j, k];

                        _previousDeltaWeights[i][j, k] = deltaWeight;
                    }
                }
            }
        }
        public void TestEverything()
        {
            using (var file = Tests.OpenFile("/opennlp/tools/sentdetect/Sentences.txt")) {
                var mlParams = new TrainingParameters();

                mlParams.Set(Parameters.Iterations, "100");
                mlParams.Set(Parameters.Cutoff, "0");

                var sdFactory = new SentenceDetectorFactory("en", true, null, null);
                var stream    = new SentenceSampleStream(new PlainTextByLineStream(file));

                var model = SentenceDetectorME.Train("en", stream, sdFactory, mlParams);

                Assert.AreEqual("en", model.Language);
                Assert.AreEqual(model.UseTokenEnd, true);

                var sMe = new SentenceDetectorME(model);

                // test the SharpNL sentences
                SentenceDetectorMETest.EvalSentences(sMe);

                var sFile = Path.GetTempFileName();

                model.Serialize(new FileStream(sFile, FileMode.Create));

                var jModel2 = new JavaModel(OpenNLP.CreateInputStream(sFile));

                var jMe = new JavaSDME(jModel2);

                // test the Java OpenNLP sentences.
                JavaEvalSentences(jMe);

                // first try?! Yes! ;-)
            }
        }
 private static DocumentCategorizerModel Train(DocumentCategorizerFactory factory = null) {
     return DocumentCategorizerME.Train(
         "x-unspecified",
         CreateSampleStream(),
         TrainingParameters.DefaultParameters(),
         factory ?? new DocumentCategorizerFactory());
 }
Esempio n. 7
0
        public void TestPerceptronOnPrepAttachDataWithStepSizeDecrease()
        {
            var trainParams = new TrainingParameters();

            trainParams.Set(Parameters.Algorithm, Parameters.Algorithms.Perceptron);
            trainParams.Set(Parameters.Cutoff, "1");
            trainParams.Set(Parameters.Iterations, "500");
            trainParams.Set(Parameters.StepSizeDecrease, "0.06");

            var trainer = TrainerFactory.GetEventTrainer(trainParams, null, null);
            var model   = trainer.Train(PrepAttachDataUtility.CreateTrainingStream());

            /*
             * The java test gives an error too, soo.... for now i'll assume that is correct :P
             *
             * java.lang.AssertionError: expected:<0.7756870512503095> but was:<0.7766773953948998>
             *  at org.junit.Assert.fail(Assert.java:91)
             *  at org.junit.Assert.failNotEquals(Assert.java:645)
             *  at org.junit.Assert.assertEquals(Assert.java:441)
             *  at org.junit.Assert.assertEquals(Assert.java:510)
             */
            //PrepAttachDataUtility.TestModel(model, 0.7756870512503095); < OpenNLP value

            PrepAttachDataUtility.TestModel(model, 0.77742015350334237);
        }
        public void TestSimpleTraining()
        {
            IObjectStream <DocumentSample> samples = new GenericObjectStream <DocumentSample>(
                new DocumentSample("1", new[] { "a", "b", "c", "1", "2" }),
                new DocumentSample("1", new[] { "a", "b", "c", "3", "4" }),
                new DocumentSample("0", new[] { "x", "y", "z" }),
                new DocumentSample("0", new[] { "x", "y", "z", "5", "6" }),
                new DocumentSample("0", new[] { "x", "y", "z", "7", "8" }));

            var param = new TrainingParameters();

            param.Set(Parameters.Iterations, "100");
            param.Set(Parameters.Cutoff, "0");
            param.Set(Parameters.Algorithm, Parameters.Algorithms.NaiveBayes);

            var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory(WhitespaceTokenizer.Instance, new [] { new BagOfWordsFeatureGenerator() }));

            var doccat = new DocumentCategorizerME(model);

            var aProbs = doccat.Categorize("a");

            Assert.AreEqual("1", doccat.GetBestCategory(aProbs));

            var bProbs = doccat.Categorize("x");

            Assert.AreEqual("0", doccat.GetBestCategory(bProbs));

            //test to make sure sorted map's last key is cat 1 because it has the highest score.
            var sortedScoreMap = doccat.SortedScoreMap("a");

            var last = sortedScoreMap.Last();

            Assert.AreEqual("1", last.Value[0]);
        }
Esempio n. 9
0
        /// <summary>
        /// Determines whether the specified train parameters are valid.
        /// </summary>
        /// <param name="trainParams">The train parameters.</param>
        /// <returns><c>true</c> if the specified train parameters are valid; otherwise, <c>false</c>.</returns>
        public static bool IsValid(TrainingParameters trainParams)
        {
            if (!trainParams.IsValid())
            {
                return(false);
            }

            var algorithmName = trainParams.Get(Parameters.Algorithm);

            if (!(builtInTrainers.ContainsKey(algorithmName) || GetTrainerType(trainParams) != null))
            {
                return(false);
            }

            var dataIndexer = trainParams.Get(Parameters.DataIndexer);

            if (dataIndexer != null)
            {
                switch (dataIndexer)
                {
                case Parameters.DataIndexers.OnePass:
                case Parameters.DataIndexers.TwoPass:
                    break;

                default:
                    return(false);
                }
            }

            return(true);
        }
Esempio n. 10
0
        public static POSModel TrainModel(string path, ModelType mt)
        {
            FileStream          fs     = new FileStream(path, FileMode.Open, FileAccess.Read);
            WordTagSampleStream stream = new WordTagSampleStream(fs);

            TrainingParameters trainParams = new TrainingParameters();

            trainParams.Set(Parameters.Iterations, "100");
            trainParams.Set(Parameters.Cutoff, "0");
            switch (mt)
            {
            case ModelType.Maxent:
                trainParams.Set(Parameters.Algorithm, "MAXENT");
                break;

            case ModelType.Perceptron:
                trainParams.Set(Parameters.Algorithm, "PERCEPTRON");
                break;

            default:
                throw new NotSupportedException();
            }

            return(POSTaggerME.Train(TRAINING_LANGUAGE, stream, trainParams, new POSTaggerFactory()));
        }
Esempio n. 11
0
        /// <summary>
        /// Gets the sequence model trainer.
        /// </summary>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="reportMap">The report map.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The <see cref="ISequenceTrainer"/> trainer object.</returns>
        /// <exception cref="System.InvalidOperationException">Trainer type couldn't be determined!</exception>
        public static ISequenceTrainer GetSequenceModelTrainer(TrainingParameters parameters, Dictionary <string, string> reportMap, Monitor monitor)
        {
            var trainerType = parameters.Get(Parameters.Algorithm);

            ISequenceTrainer trainer = null;

            if (trainerType != null)
            {
                if (builtInTrainers.ContainsKey(trainerType))
                {
                    trainer = CreateBuiltinTrainer <ISequenceTrainer>(trainerType, monitor);
                }
                if (customTrainers.ContainsKey(trainerType))
                {
                    trainer = CreateCustomTrainer <ISequenceTrainer>(trainerType, monitor);
                }
            }

            if (trainer == null)
            {
                throw new InvalidOperationException("Trainer type couldn't be determined!");
            }

            trainer.Init(parameters, reportMap);
            return(trainer);
        }
        public void testSimpleTraining() {

            IObjectStream<DocumentSample> samples = new GenericObjectStream<DocumentSample>(new[] {
                new DocumentSample("1", new[] {"a", "b", "c"}),
                new DocumentSample("1", new[] {"a", "b", "c", "1", "2"}),
                new DocumentSample("1", new[] {"a", "b", "c", "3", "4"}),
                new DocumentSample("0", new[] {"x", "y", "z"}),
                new DocumentSample("0", new[] {"x", "y", "z", "5", "6"}),
                new DocumentSample("0", new[] {"x", "y", "z", "7", "8"})
            });

            var param = new TrainingParameters();
            param.Set(Parameters.Iterations, "100");
            param.Set(Parameters.Cutoff, "0");

            var model = DocumentCategorizerME.Train("x-unspecified", samples, param, new DocumentCategorizerFactory());

            var doccat = new DocumentCategorizerME(model);

            var aProbs = doccat.Categorize("a");

            Assert.AreEqual("1", doccat.GetBestCategory(aProbs));

            var bProbs = doccat.Categorize("x");
            Assert.AreEqual("0", doccat.GetBestCategory(bProbs));

            //test to make sure sorted map's last key is cat 1 because it has the highest score.
            var sortedScoreMap = doccat.SortedScoreMap("a");

            foreach (var pair in sortedScoreMap) {
                Assert.AreEqual("1", pair.Value[0]);
                break;   
            }
        }
        //private string _filePath;


        public ArtificialNeuralNetwork(LayerStructure layerStructure, TrainingParameters trainingParameters,
                                       IDataSet dataSet, IOptimizationStrategy strategy, IInitialRandomDistributionType randomDistribution)
        {
            _layerStructure          = layerStructure;
            _trainingParameters      = trainingParameters;
            DataSet                  = dataSet;
            _strategy                = strategy;
            _strategy.LayerStructure = layerStructure;
            _randomDistribution      = randomDistribution;

            CreateDataSets();

            SetWeights();
            InitPreviousDeltaWeights();

            SetBiases();
            InitPreviousDeltaBiases();


            _strategy.FetchInitialWeightsAndBiases(ref _weights, ref _biases);

            _strategy.FetchPreviousDeltaWeightsAndBiases(ref _previousDeltaWeights, ref _previousDeltaBiases);

            _savePath = Path.GetTempPath();
        }
Esempio n. 14
0
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="iterations">The number of training iterations.</param>
        /// <param name="cutoff">The min number of times a feature must be seen.</param>
        /// <returns>The trained <see cref="ParserModel" /> object.</returns>
        public static ParserModel Train(
            Monitor monitor,
            string languageCode,
            IObjectStream <Parse> samples,
            AbstractHeadRules rules,
            int iterations,
            int cutoff)
        {
            var param = new TrainingParameters();

            param.Set("dict", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));

            param.Set("tagger", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("tagger", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            param.Set("chunker", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("chunker", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            param.Set("check", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("check", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            param.Set("build", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("build", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            return(Train(monitor, languageCode, samples, rules, param));
        }
Esempio n. 15
0
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <returns>The trained <see cref="ParserModel" /> object.</returns>
        public static ParserModel Train(
            Monitor monitor,
            string languageCode,
            IObjectStream <Parse> samples,
            AbstractHeadRules rules,
            TrainingParameters parameters)
        {
            var dict = BuildDictionary(samples, rules, parameters);

            samples.Reset();

            var manifestInfoEntries = new Dictionary <string, string>();

            // build
            //System.err.println("Training builder");
            var bes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dict);
            var buildReportMap = new Dictionary <string, string>();
            var buildTrainer   = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor);


            var buildModel = buildTrainer.Train(bes);

            MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");

            samples.Reset();

            // tag
            var posTaggerParams = parameters.GetNamespace("tagger");

            if (!posTaggerParams.Contains(Parameters.BeamSize))
            {
                posTaggerParams.Set(Parameters.BeamSize, "10");
            }


            var posModel = POSTaggerME.Train(languageCode, new PosSampleStream(samples),
                                             parameters.GetNamespace("tagger"), new POSTaggerFactory());

            samples.Reset();

            // chunk
            var chunkModel = ChunkerME.Train(languageCode,
                                             new ChunkSampleStream(samples),
                                             parameters.GetNamespace("chunker"),
                                             new ParserChunkerFactory());

            samples.Reset();

            // check
            //System.err.println("Training checker");
            var kes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check);
            var checkReportMap = new Dictionary <string, string>();
            var checkTrainer   = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor);

            var checkModel = checkTrainer.Train(kes);

            MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");

            return(new ParserModel(languageCode, buildModel, checkModel, posModel, chunkModel, rules, manifestInfoEntries));
        }
Esempio n. 16
0
        private ClassifierFacade RunTrainingForSelectedCategoriesImpl(TrainingParameters trainingParameters)
        {
            var categories  = trainingParameters.SelectedCategories.ToArray();
            var learningSet = _dataProvider.GetLearningSetForCategories(categories);

            var layers          = _globalTrainerConfiguration.HiddenLayers.ToList();
            int outputLayerSize = categories.Length;

            layers.Add(outputLayerSize);

            var trainer = new Trainer(new TrainerConfiguration
            {
                Layers            = layers.ToArray(),
                InputsOutputsData = learningSet.TrainingData.ToInputOutputsDataNative(),
            }, _skipPhaseRequest, _guiLogger);

            trainer.RunTraining1(trainingParameters.Training1Parameters);
            trainer.RunTraining2(trainingParameters.Training2Parameters);

            trainer.CheckAccuracy(learningSet.TestData.ToInputOutputsDataNative());

            var classifierConfiguration = new ClassifierConfiguration {
                Categories = categories
            };
            var classifier = new Classifier(trainer.NeuralNetwork, classifierConfiguration, _guiLogger);

            var classifierFacade = new ClassifierFacade(_dataProvider, classifier);

            return(classifierFacade);
        }
Esempio n. 17
0
        private void UpdateBiasesAdam(TrainingParameters trainingParameters, int timeStep)
        {
            int numberOfBiasMatrices = _biases.Count;

            for (int i = 0; i < numberOfBiasMatrices; i++)
            {
                int numberOfBiasRows = _biases[i].GetLength(0);

                for (int j = 0; j < numberOfBiasRows; j++)
                {
                    timeStep += 1;

                    _m = _Beta1 * _m + (1 - _Beta1) * _deltaBiasMatrices[i][j];

                    double mt = _m / (1 - Math.Pow(_Beta1, timeStep));

                    _v = _Beta2 * _v + (1 - _Beta2) * Math.Pow(_deltaBiasMatrices[i][j], 2);

                    double vt = _v / (1 - Math.Pow(_Beta2, timeStep));

                    var deltaBias = trainingParameters.learningRate * (mt / Math.Sqrt(vt + _epsilon));

                    _biases[i][j] -= deltaBias + _previousDeltaBiases[i][j];

                    _previousDeltaBiases[i][j] = deltaBias;
                }
            }
        }
Esempio n. 18
0
        /// <summary>
        /// Gets the event trainer.
        /// </summary>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="reportMap">The report map.</param>
        /// <param name="monitor">A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>The <see cref="IEventTrainer" /> trainer object.</returns>
        /// <exception cref="System.InvalidOperationException">
        /// Unable to retrieve the trainer from the training parameters.
        /// or
        /// The constructor of the trainer must have a standard constructor.
        /// </exception>
        public static IEventTrainer GetEventTrainer(TrainingParameters parameters, Dictionary <string, string> reportMap, Monitor monitor)
        {
            var algorithm = parameters.Get(Parameters.Algorithm);

            if (algorithm == null)
            {
                AbstractEventTrainer trainer = new GIS(monitor);
                trainer.Init(parameters, reportMap);
                return(trainer);
            }

            var trainerType = GetTrainerType(parameters);

            if (trainerType.HasValue && trainerType.Value == TrainerType.EventModelTrainer)
            {
                var type = GetTrainer(algorithm);

                if (type == null)
                {
                    throw new InvalidOperationException("Unable to retrieve the trainer from the training parameters.");
                }

                var ctor = type.GetConstructor(new [] { typeof(Monitor) });
                if (ctor == null)
                {
                    throw new InvalidOperationException("The constructor of the trainer must have a standard constructor.");
                }

                var trainer = (IEventTrainer)ctor.Invoke(new object[] { monitor });
                trainer.Init(parameters, reportMap);
                return(trainer);
            }

            return(null);
        }
Esempio n. 19
0
        public void TestOnlyWithNamesWithTypes()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary <string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = WhitespaceTokenizer.Instance.Tokenize(
                    "Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman Robert Aderholt " +
                    "Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander");

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(new Span(0, 2, "person"), names[0]);
                Assert.AreEqual(new Span(2, 4, "person"), names[1]);
                Assert.AreEqual(new Span(4, 6, "person"), names[2]);
                Assert.True(!HasOtherAsOutcome(model));
            }
        }
Esempio n. 20
0
        public void TestOnlyWithEntitiesWithTypes()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary <string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = WhitespaceTokenizer.Instance.Tokenize("NATO United States Barack Obama");

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(new Span(0, 1, "organization"), names[0]);
                Assert.AreEqual(new Span(1, 3, "location"), names[1]);
                Assert.AreEqual(new Span(3, 5, "person"), names[2]);
                Assert.False(HasOtherAsOutcome(model));
            }
        }
Esempio n. 21
0
        public void TestNameFinderWithMultipleTypes()
        {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/voa1.train")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary <string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = new [] { "U", ".", "S", ".", "President", "Barack", "Obama", "has",
                                        "arrived", "in", "South", "Korea", ",", "where", "he", "is", "expected", "to",
                                        "show", "solidarity", "with", "the", "country", "'", "s", "president", "in",
                                        "demanding", "North", "Korea", "move", "toward", "ending", "its", "nuclear",
                                        "weapons", "programs", "." };

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(4, names.Length);
                Assert.AreEqual(new Span(0, 4, "location"), names[0]);
                Assert.AreEqual(new Span(5, 7, "person"), names[1]);
                Assert.AreEqual(new Span(10, 12, "location"), names[2]);
                Assert.AreEqual(new Span(28, 30, "location"), names[3]);

                /*
                 * These asserts are not needed because the equality comparer handles the Type
                 * assertEquals("location", names1[0].getType());
                 * assertEquals("person", names1[1].getType());
                 * assertEquals("location", names1[2].getType());
                 * assertEquals("location", names1[3].getType());
                 */

                sentence = new[] {
                    "Scott", "Snyder", "is", "the", "director", "of", "the",
                    "Center", "for", "U", ".", "S", ".", "Korea", "Policy", "."
                };

                names = nameFinder.Find(sentence);

                Assert.AreEqual(2, names.Length);
                Assert.AreEqual(new Span(0, 2, "person"), names[0]);
                Assert.AreEqual(new Span(7, 15, "organization"), names[1]);

                /*
                 *
                 * assertEquals("person", names2[0].getType());
                 * assertEquals("organization", names2[1].getType());
                 *
                 */
            }
        }
Esempio n. 22
0
 /// <summary>
 /// Trains a chunker model with the given parameters.
 /// </summary>
 /// <param name="languageCode">The language code.</param>
 /// <param name="samples">The data samples.</param>
 /// <param name="factory">The sentence detector factory.</param>
 /// <param name="parameters">The machine learnable parameters.</param>
 /// <returns>The trained <see cref="ChunkerModel"/> object.</returns>
 /// <exception cref="System.InvalidOperationException">The trainer was not specified.</exception>
 /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
 public static ChunkerModel Train(
     string languageCode,
     IObjectStream <ChunkSample> samples,
     TrainingParameters parameters,
     ChunkerFactory factory)
 {
     return(Train(languageCode, samples, parameters, factory, null));
 }
Esempio n. 23
0
 /// <summary>
 /// Initializes a new instance of the <see cref="ParserCrossEvaluator"/> class.
 /// </summary>
 /// <param name="languageCode">The language of the training data.</param>
 /// <param name="parameters">The machine learning train parameters.</param>
 /// <param name="parserType">The parser model type.</param>
 /// <param name="monitors">The training monitors.</param>
 /// <param name="headRules">The headrules.</param>
 public ParserCrossEvaluator(string languageCode, TrainingParameters parameters, AbstractHeadRules headRules, ParserType parserType, params IEvaluationMonitor <Parse>[] monitors)
 {
     this.languageCode = languageCode;
     this.parameters   = parameters;
     this.headRules    = headRules;
     this.parserType   = parserType;
     this.monitors     = monitors;
 }
Esempio n. 24
0
        public void Setup() {
            mlParams = new TrainingParameters();
            mlParams.Set(Parameters.Algorithm, GIS.MaxEntropy);
            mlParams.Set(Parameters.Iterations, "10");
            mlParams.Set(Parameters.Cutoff, "5");

            TrainerFactory.RegisterTrainer("Dummy", typeof(DummyTrainer));
        }
Esempio n. 25
0
 /// <summary>
 /// Trains a parser model with the given parameters.
 /// </summary>
 /// <param name="languageCode">The language code.</param>
 /// <param name="samples">The data samples.</param>
 /// <param name="rules">The head rules.</param>
 /// <param name="parameters">The machine learnable parameters.</param>
 /// <returns>The trained <see cref="ParserModel"/> object.</returns>
 /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
 public static ParserModel Train(
     string languageCode,
     IObjectStream <Parse> samples,
     AbstractHeadRules rules,
     TrainingParameters parameters)
 {
     return(Train(languageCode, samples, rules, parameters, null));
 }
Esempio n. 26
0
 /// <summary>
 /// Trains a name finder model.
 /// </summary>
 /// <param name="languageCode">The language of the training data.</param>
 /// <param name="samples">The training samples.</param>
 /// <param name="parameters">The machine learning train parameters.</param>
 /// <param name="factory">The name finder factory.</param>
 /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns>
 public static TokenNameFinderModel Train(
     string languageCode,
     IObjectStream <NameSample> samples,
     TrainingParameters parameters,
     TokenNameFinderFactory factory)
 {
     return(Train(languageCode, DefaultType, samples, parameters, factory));
 }
Esempio n. 27
0
 /// <summary>
 /// Trains sentence detection model with the given parameters.
 /// </summary>
 /// <param name="languageCode">The language code.</param>
 /// <param name="samples">The data samples.</param>
 /// <param name="parameters">The machine learnable parameters.</param>
 /// <param name="factory">The sentence detector factory.</param>
 /// <returns>The trained <see cref="SentenceModel"/> object.</returns>
 public static SentenceModel Train(
     string languageCode,
     IObjectStream <SentenceSample> samples,
     SentenceDetectorFactory factory,
     TrainingParameters parameters)
 {
     return(Train(languageCode, samples, factory, parameters, null));
 }
Esempio n. 28
0
 /// <summary>
 /// Trains document categorizer model with the given parameters.
 /// </summary>
 /// <param name="languageCode">The language code.</param>
 /// <param name="samples">The data samples.</param>
 /// <param name="parameters">The machine learnable parameters.</param>
 /// <param name="factory">The document categorizer factory.</param>
 /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns>
 public static DocumentCategorizerModel Train(
     string languageCode,
     IObjectStream <DocumentSample> samples,
     TrainingParameters parameters,
     DocumentCategorizerFactory factory)
 {
     return(Train(languageCode, samples, parameters, factory, null));
 }
Esempio n. 29
0
        public void Setup() {
            var p = new TrainingParameters();
            p.Set(Parameters.Iterations, "70");
            p.Set(Parameters.Cutoff, "1");

            var chunkerModel = ChunkerME.Train("en", CreateSampleStream(), p, new ChunkerFactory());

            chunker = new ChunkerME(chunkerModel);
        }
Esempio n. 30
0
 /// <summary>
 /// Initializes a new instance of the <see cref="TokenNameFinderCrossValidator"/> class.
 /// </summary>
 /// <param name="languageCode">The language of the training data.</param>
 /// <param name="type"><c>null</c> or an override type for all types in the training data.</param>
 /// <param name="parameters">The machine learning train parameters.</param>
 /// <param name="listeners">The listeners.</param>
 public TokenNameFinderCrossValidator(string languageCode, string type, TrainingParameters parameters, params IEvaluationMonitor <NameSample>[] listeners)
 {
     this.languageCode = languageCode;
     this.type         = type;
     this.parameters   = parameters;
     this.listeners    = listeners;
     factory           = new TokenNameFinderFactory();
     FMeasure          = new FMeasure <Span>();
 }
Esempio n. 31
0
 public static TokenizerModel CreateMaxentTokenModel() {
     using (var data = Tests.OpenFile("/opennlp/tools/tokenize/token.train")) {
         var samples = new TokenSampleStream(new PlainTextByLineStream(data));
         var mlParams = new TrainingParameters();
         mlParams.Set(Parameters.Iterations, "100");
         mlParams.Set(Parameters.Cutoff, "0");
         return TokenizerME.Train(samples, new TokenizerFactory("en", null, true), mlParams);
     }
 }
Esempio n. 32
0
        public void Setup()
        {
            mlParams = new TrainingParameters();
            mlParams.Set(Parameters.Algorithm, Parameters.Algorithms.MaxEnt);
            mlParams.Set(Parameters.Iterations, "10");
            mlParams.Set(Parameters.Cutoff, "5");

            TrainerFactory.RegisterTrainer("Dummy", typeof(DummyTrainer));
        }
Esempio n. 33
0
 public static TokenizerModel CreateMaxentTokenModel()
 {
     using (var data = Tests.OpenFile("/opennlp/tools/tokenize/token.train")) {
         var samples  = new TokenSampleStream(new PlainTextByLineStream(data));
         var mlParams = new TrainingParameters();
         mlParams.Set(Parameters.Iterations, "100");
         mlParams.Set(Parameters.Cutoff, "0");
         return(TokenizerME.Train(samples, new TokenizerFactory("en", null, true), mlParams));
     }
 }
Esempio n. 34
0
        public void Setup()
        {
            var p = new TrainingParameters();

            p.Set(Parameters.Iterations, "70");
            p.Set(Parameters.Cutoff, "1");

            var chunkerModel = ChunkerME.Train("en", CreateSampleStream(), p, new ChunkerFactory());

            chunker = new ChunkerME(chunkerModel);
        }
Esempio n. 35
0
        public void TestNameFinder() {

            using (var file = Tests.OpenFile("opennlp/tools/namefind/AnnotatedSentences.txt")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1"));

                var param = new TrainingParameters();
                param.Set(Parameters.Iterations, "70");
                param.Set(Parameters.Cutoff, "1");

                var model = NameFinderME.Train(
                    "en",
                    sampleStream,
                    param,
                    new TokenNameFinderFactory(null, new Dictionary<string, object>()));

                var nameFinder = new NameFinderME(model);

                // now test if it can detect the sample sentences
                var sentence = new[] {
                    "Alisa",
                    "appreciated",
                    "the",
                    "hint",
                    "and",
                    "enjoyed",
                    "a",
                    "delicious",
                    "traditional",
                    "meal."
                };

                var names = nameFinder.Find(sentence);

                Assert.AreEqual(1, names.Length);
                Assert.AreEqual(new Span(0, 1, Type), names[0]);

                sentence = new[] {
                    "Hi",
                    "Mike",
                    ",",
                    "it's",
                    "Stefanie",
                    "Schmidt",
                    "."
                };

                names = nameFinder.Find(sentence);

                Assert.AreEqual(2, names.Length);
                Assert.AreEqual(new Span(1, 2, Type), names[0]);
                Assert.AreEqual(new Span(4, 6, Type), names[1]);

            }
        }
Esempio n. 36
0
        public void TestMaxentOnPrepAttachDataWithParamsDefault() {
            var reportMap = new Dictionary<string, string>();
            var trainParams = new TrainingParameters();

            trainParams.Set(Parameters.Algorithm, Parameters.Algorithms.MaxEnt);

            var trainer = TrainerFactory.GetEventTrainer(trainParams, reportMap, null);
            var model = trainer.Train(PrepAttachDataUtility.CreateTrainingStream());

            PrepAttachDataUtility.TestModel(model, 0.8086159940579352d);
        }
Esempio n. 37
0
        public void TestMaxentOnPrepAttachDataWithParams() {
            var reportMap = new Dictionary<string, string>();
            var trainParams = new TrainingParameters();

            trainParams.Set(Parameters.Algorithm, Parameters.Algorithms.MaxEnt);
            trainParams.Set(Parameters.DataIndexer, Parameters.DataIndexers.TwoPass);
            trainParams.Set(Parameters.Cutoff, "1");

            var trainer = TrainerFactory.GetEventTrainer(trainParams, reportMap, null);
            var model = trainer.Train(PrepAttachDataUtility.CreateTrainingStream());

            PrepAttachDataUtility.TestModel(model, 0.7997028967566229d);
        }
Esempio n. 38
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ChunkerCrossValidator"/> class.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="parameters">The parameters.</param>
        /// <param name="factory">The factory.</param>
        /// <param name="listeners">The listeners.</param>
        public ChunkerCrossValidator(
            string languageCode,
            TrainingParameters parameters, 
            ChunkerFactory factory, 
            params IEvaluationMonitor<ChunkSample>[] listeners) {

            chunkerFactory = factory;
            FMeasure = new FMeasure<Span>();
            
            this.languageCode = languageCode;
            this.parameters = parameters;
            this.listeners = listeners;           
        }
        public void TestPerceptronOnPrepAttachDataWithTolerance() {
            var trainParams = new TrainingParameters();

            trainParams.Set(Parameters.Algorithm, Parameters.Algorithms.Perceptron);
            trainParams.Set(Parameters.Cutoff, "1");
            trainParams.Set(Parameters.Iterations, "500");
            trainParams.Set(Parameters.Tolerance, "0.0001");

            var trainer = TrainerFactory.GetEventTrainer(trainParams, null, null);
            var model = trainer.Train(PrepAttachDataUtility.CreateTrainingStream());

            PrepAttachDataUtility.TestModel(model, 0.7677642980935875);

        }
        public void TestPerceptronOnPrepAttachDataWithSkippedAveraging() {

            var trainParams = new TrainingParameters();

            trainParams.Set(Parameters.Algorithm, Parameters.Algorithms.Perceptron);
            trainParams.Set(Parameters.Cutoff, "1");
            trainParams.Set(Parameters.UseSkippedAveraging, "true");

            var trainer = TrainerFactory.GetEventTrainer(trainParams, null, null);
            var model = trainer.Train(PrepAttachDataUtility.CreateTrainingStream());

            PrepAttachDataUtility.TestModel(model, 0.773706362961129);

        }
        public void TestWithNameEvaluationErrorListener() {
            using (var file = Tests.OpenFile("opennlp/tools/namefind/AnnotatedSentences.txt")) {
                var sampleStream = new NameSampleStream(new PlainTextByLineStream(file, "ISO-8859-1"));

                var parameters = new TrainingParameters();
                parameters.Set(Parameters.Iterations, "70");
                parameters.Set(Parameters.Cutoff, "1");
                parameters.Set(Parameters.Algorithm, Parameters.Algorithms.MaxEnt);

                var cv = new TokenNameFinderCrossValidator("en", Type, parameters, new NameEvaluationErrorListener());

                cv.Evaluate(sampleStream, 2);

                Assert.NotNull(cv.FMeasure);
            }

        }
Esempio n. 42
0
 public RankScore(ActivationMethod activationMethod, Ms2DetectorType ms2DetectorType, Enzyme enzyme, Protocol protocol)
 {
     if (activationMethod == ActivationMethod.HCD && enzyme == Enzyme.Trypsin)
     {
         var paramFile = Properties.Resources.HCD_Trypsin;
         var stream = new MemoryStream();
         var writer = new StreamWriter(stream);
         writer.Write(paramFile);
         writer.Flush();
         stream.Position = 0;
         _trainingParameters = new TrainingParameters(stream);
     }
     else
     {
         throw new ArgumentException("No parameter file available for selected arguments.");
     }
 }
Esempio n. 43
0
        internal static POSModel TrainPOSModel(ModelType type = ModelType.Maxent) {

            var p = new TrainingParameters();
            switch (type) {
                case ModelType.Maxent:
                    p.Set(Parameters.Algorithm, "MAXENT");
                    break;
                case ModelType.Perceptron:
                    p.Set(Parameters.Algorithm, "PERCEPTRON");
                    break;
                default:
                    throw new NotSupportedException();
            }

            p.Set(Parameters.Iterations, "100");
            p.Set(Parameters.Cutoff, "5");

            return POSTaggerME.Train("en", CreateSampleStream(), p, new POSTaggerFactory());
        }
        public void TestSentenceDetector() {
            using (var file = Tests.OpenFile("/opennlp/tools/sentdetect/Sentences.txt")) {

                var mlParams = new TrainingParameters();

                mlParams.Set(Parameters.Iterations, "100");
                mlParams.Set(Parameters.Cutoff, "0");

                var sdFactory = new SentenceDetectorFactory("en", true, null, null);
                var stream = new SentenceSampleStream(new PlainTextByLineStream(file));

                var model = SentenceDetectorME.Train("en", stream, sdFactory, mlParams);

                Assert.AreEqual("en", model.Language);
                Assert.AreEqual(model.UseTokenEnd, true);

                EvalSentences(new SentenceDetectorME(model));
            }
        }
Esempio n. 45
0
        public static TokenizerModel CreateSimpleMaxentTokenModel() {
            var samples = new List<TokenSample> {
                new TokenSample("year", new[] {new Span(0, 4)}),
                new TokenSample("year,", new[] {new Span(0, 4), new Span(4, 5)}),
                new TokenSample("it,", new[] {new Span(0, 2), new Span(2, 3)}),
                new TokenSample("it", new[] {new Span(0, 2)}),
                new TokenSample("yes", new[] {new Span(0, 3)}),
                new TokenSample("yes,", new[] {new Span(0, 3), new Span(3, 4)})
            };

            var mlParams = new TrainingParameters();
            mlParams.Set(Parameters.Iterations, "100");
            mlParams.Set(Parameters.Cutoff, "0");

            return TokenizerME.Train(
                new CollectionObjectStream<TokenSample>(samples),
                new TokenizerFactory("en", null, true),
                mlParams);
        }
        public void AbbreviationDefaultBehaviorTest() {

            var samples =
                "Test E-mail met zowel letsel als 12. Toedracht in het onderwerp." + Environment.NewLine +
                "Dit is een 2e regel met een tel. 011-4441444 erin." + Environment.NewLine +
                "Dit is een 2e regel." + Environment.NewLine +
                "Dit is een 2e regel." + Environment.NewLine + Environment.NewLine +

                "Dit is een 2e regel met een tel. 033-1333123 erin!" + Environment.NewLine +
                "Test E-mail met zowel winst als 12. toedracht in het onderwerp." + Environment.NewLine +
                "Dit is een 2e regel!" + Environment.NewLine +
                "Dit is een 2e regel." + Environment.NewLine;

            var stringsToIgnoreDictionary = new SharpNL.Dictionary.Dictionary(false) {
                {"12. Toedracht"},
                {"Tel."},
            };

            var trainingParameters = new TrainingParameters();

            trainingParameters.Set(Parameters.Algorithm, "MAXENT");
            trainingParameters.Set(Parameters.TrainerType, "Event");
            trainingParameters.Set(Parameters.Iterations, "100");
            trainingParameters.Set(Parameters.Cutoff, "5");

            char[] eos = { '.', '?', '!' };
            var sdFactory = new SentenceDetectorFactory("nl", true, stringsToIgnoreDictionary, eos);
            var stringReader = new StringReader(samples);
            var stream = new SentenceSampleStream(new PlainTextByLineStream(stringReader));

            var sentenceModel = SentenceDetectorME.Train("nl", stream, sdFactory, trainingParameters);
            var sentenceDetectorMe = new SentenceDetectorME(sentenceModel);

            var sentences = sentenceDetectorMe.SentDetect(samples);
            var expected = samples.Split(new []{ Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);


            Assert.AreEqual(8, sentences.Length);
            for (var i = 0; i < sentences.Length; i++)
                Assert.AreEqual(expected[i], sentences[i]);
            
        }
Esempio n. 47
0
        public void Setup() {
            var sParams = new TrainingParameters();
            sParams.Set(Parameters.Iterations, "70");
            sParams.Set(Parameters.Cutoff, "1");

            var jParams = new opennlp.tools.util.TrainingParameters();
            jParams.put("Iterations", "70");
            jParams.put("Cutoff", "1");

            var sModel = ChunkerME.Train("en", ChunkerMETest.CreateSampleStream(), sParams, new ChunkerFactory());

            var jModel = opennlp.tools.chunker.ChunkerME.train("en", JavaSampleStream(), jParams,
                new opennlp.tools.chunker.ChunkerFactory());

            Assert.NotNull(sModel);
            Assert.NotNull(jModel);

            sChunker = new ChunkerME(sModel);
            jChunker = new opennlp.tools.chunker.ChunkerME(jModel);
        }
        public void TestPerceptronOnPrepAttachDataWithStepSizeDecrease() {
            var trainParams = new TrainingParameters();

            trainParams.Set(Parameters.Algorithm, Parameters.Algorithms.Perceptron);
            trainParams.Set(Parameters.Cutoff, "1");
            trainParams.Set(Parameters.Iterations, "500");
            trainParams.Set(Parameters.StepSizeDecrease, "0.06");

            var trainer = TrainerFactory.GetEventTrainer(trainParams, null, null);
            var model = trainer.Train(PrepAttachDataUtility.CreateTrainingStream());

            /*
             * The java test gives an error too, soo.... for now i'll assume that is correct :P
             * 
             * java.lang.AssertionError: expected:<0.7756870512503095> but was:<0.7766773953948998>
                at org.junit.Assert.fail(Assert.java:91)
                at org.junit.Assert.failNotEquals(Assert.java:645)
                at org.junit.Assert.assertEquals(Assert.java:441)
                at org.junit.Assert.assertEquals(Assert.java:510)
            */
            //PrepAttachDataUtility.TestModel(model, 0.7756870512503095); < OpenNLP value

            PrepAttachDataUtility.TestModel(model, 0.77742015350334237);
        }
Esempio n. 49
0
 /// <summary>
 /// Trains a name finder model.
 /// </summary>
 /// <param name="languageCode">The language of the training data.</param>
 /// <param name="samples">The training samples.</param>
 /// <param name="parameters">The machine learning train parameters.</param>
 /// <param name="factory">The name finder factory.</param>
 /// <param name="monitor">
 /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
 /// This argument can be a <c>null</c> value.</param>
 /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns>
 public static TokenNameFinderModel Train(string languageCode, IObjectStream<NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor) {
     return Train(languageCode, DefaultType, samples, parameters, factory, monitor);
 }
Esempio n. 50
0
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <returns>The trained <see cref="ParserModel" /> object.</returns>
        public static ParserModel Train(
            Monitor monitor,
            string languageCode, 
            IObjectStream<Parse> samples, 
            AbstractHeadRules rules,
            TrainingParameters parameters) {

            var dict = BuildDictionary(samples, rules, parameters);

            samples.Reset();

            var manifestInfoEntries = new Dictionary<string, string>();

            // build
            //System.err.println("Training builder");
            var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dict);
            var buildReportMap = new Dictionary<string, string>();
            var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor);


            var buildModel = buildTrainer.Train(bes);

            MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");

            samples.Reset();

            // tag
            var posTaggerParams = parameters.GetNamespace("tagger");
            if (!posTaggerParams.Contains(Parameters.BeamSize))
                posTaggerParams.Set(Parameters.BeamSize, "10");


            var posModel = POSTaggerME.Train(languageCode, new PosSampleStream(samples),
                parameters.GetNamespace("tagger"), new POSTaggerFactory());

            samples.Reset();

            // chunk
            var chunkModel = ChunkerME.Train(languageCode, 
                new ChunkSampleStream(samples),
                parameters.GetNamespace("chunker"),
                new ParserChunkerFactory());

            samples.Reset();

            // check
            //System.err.println("Training checker");
            var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check);
            var checkReportMap = new Dictionary<string, string>();
            var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor);

            var checkModel = checkTrainer.Train(kes);
            MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");

            return new ParserModel(languageCode, buildModel, checkModel, posModel, chunkModel, rules, manifestInfoEntries);
        }
Esempio n. 51
0
        /// <summary>
        /// Trains document categorizer model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The document categorizer factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns>
        public static DocumentCategorizerModel Train(string languageCode, IObjectStream<DocumentSample> samples, TrainingParameters parameters, DocumentCategorizerFactory factory, Monitor monitor) {

            var manifestInfoEntries = new Dictionary<string, string>();

            var eventStream = new DocumentCategorizerEventStream(samples, factory.FeatureGenerators);
            var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);
            var model = trainer.Train(eventStream);

            return new DocumentCategorizerModel(languageCode, model, manifestInfoEntries, factory);
        }
Esempio n. 52
0
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="ParserModel"/> object.</returns>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ParserModel Train(
            string languageCode,
            IObjectStream<Parse> samples, 
            AbstractHeadRules rules, 
            TrainingParameters parameters,
            Monitor monitor) {

            var manifestInfoEntries = new Dictionary<string, string>();

#if DEBUG
            System.Diagnostics.Debug.Print("Building dictionary");
#endif

            var dictionary = BuildDictionary(samples, rules, parameters);

            samples.Reset();

            // tag
            var posModel = POSTaggerME.Train(
                languageCode,
                new PosSampleStream(samples),
                parameters.GetNamespace("tagger"),
                new POSTaggerFactory(), monitor);

            samples.Reset();

            // chunk
            var chunkModel = ChunkerME.Train(
                languageCode,
                new ChunkSampleStream(samples),
                parameters.GetNamespace("chunker"),
                new ParserChunkerFactory(), 
                monitor);

            samples.Reset();

            // build

#if DEBUG
            System.Diagnostics.Debug.Print("Training builder");
#endif

            var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dictionary);
            var buildReportMap = new Dictionary<string, string>();
            var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor);

            var buildModel = buildTrainer.Train(bes);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");

            samples.Reset();

            // check
#if DEBUG
            System.Diagnostics.Debug.Print("Training checker");
#endif
            var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check);
            var checkReportMap = new Dictionary<string, string>();

            var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor);

            var checkModel = checkTrainer.Train(kes);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");

            samples.Reset();

            // attach
#if DEBUG
            System.Diagnostics.Debug.Print("Training attacher");
#endif
            var attachEvents = new ParserEventStream(samples, rules, ParserEventTypeEnum.Attach);
            var attachReportMap = new Dictionary<string, string>();

            var attachTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("attach"), attachReportMap, monitor);

            var attachModel = attachTrainer.Train(attachEvents);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, attachReportMap, "attach");

            return new ParserModel(
                languageCode,
                buildModel,
                checkModel,
                attachModel,
                posModel,
                chunkModel,
                rules,
                ParserType.TreeInsert,
                manifestInfoEntries);
        }
Esempio n. 53
0
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="iterations">The number of training iterations.</param>
        /// <param name="cutoff">The min number of times a feature must be seen.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="ParserModel"/> object.</returns>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ParserModel Train(string languageCode, IObjectStream<Parse> samples, AbstractHeadRules rules, int iterations, int cutoff, Monitor monitor) {

            var param = new TrainingParameters();

            param.Set("dict", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));

            param.Set("tagger", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("tagger", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            param.Set("chunker", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("chunker", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            param.Set("check", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("check", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            param.Set("build", Parameters.Cutoff, cutoff.ToString(CultureInfo.InvariantCulture));
            param.Set("build", Parameters.Iterations, iterations.ToString(CultureInfo.InvariantCulture));

            return Train(languageCode, samples, rules, param, monitor);
        }
Esempio n. 54
0
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <returns>The trained <see cref="ParserModel"/> object.</returns>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ParserModel Train(
            string languageCode,
            IObjectStream<Parse> samples,
            AbstractHeadRules rules,
            TrainingParameters parameters) {

            return Train(languageCode, samples, rules, parameters, null);
        }
Esempio n. 55
0
        /// <summary>
        /// Trains a Part of Speech model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="POSModel"/> object.</returns>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static POSModel Train(string languageCode, IObjectStream<POSSample> samples, TrainingParameters parameters, POSTaggerFactory factory, Monitor monitor) {

            //int beamSize = trainParams.Get(Parameters.BeamSize, NameFinderME.DefaultBeamSize);

            var contextGenerator = factory.GetPOSContextGenerator();
            var manifestInfoEntries = new Dictionary<string, string>();

            var trainerType = TrainerFactory.GetTrainerType(parameters);


            switch (trainerType) {
                case TrainerType.EventModelTrainer:
                    var es = new POSSampleEventStream(samples, contextGenerator);
                    var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                    var eventModel = trainer.Train(es);

                    return new POSModel(languageCode, eventModel, manifestInfoEntries, factory);

                case TrainerType.EventModelSequenceTrainer:
                    var ss = new POSSampleSequenceStream(samples, contextGenerator);
                    var trainer2 = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor);

                    var seqModel = trainer2.Train(ss);

                    return new POSModel(languageCode, seqModel, manifestInfoEntries, factory);

                case TrainerType.SequenceTrainer:
                    var trainer3 = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);

                    // TODO: This will probably cause issue, since the feature generator uses the outcomes array

                    var ss2 = new POSSampleSequenceStream(samples, contextGenerator);
                    var seqPosModel = trainer3.Train(ss2);

                    return new POSModel(languageCode, seqPosModel, manifestInfoEntries, factory);
                default:
                    throw new NotSupportedException("Trainer type is not supported.");
            }
           

        }
Esempio n. 56
0
 /// <summary>
 /// Trains a Part of Speech model with the given parameters.
 /// </summary>
 /// <param name="languageCode">The language code.</param>
 /// <param name="samples">The data samples.</param>
 /// <param name="parameters">The machine learnable parameters.</param>
 /// <param name="factory">The sentence detector factory.</param>
 /// <returns>The trained <see cref="POSModel"/> object.</returns>
 /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
 public static POSModel Train(string languageCode, IObjectStream<POSSample> samples,
     TrainingParameters parameters, POSTaggerFactory factory) {
     return Train(languageCode, samples, parameters, factory, null);
 }
Esempio n. 57
0
        public void TestEverything() {
            using (var file = Tests.OpenFile("/opennlp/tools/sentdetect/Sentences.txt")) {

                var mlParams = new TrainingParameters();

                mlParams.Set(Parameters.Iterations, "100");
                mlParams.Set(Parameters.Cutoff, "0");

                var sdFactory = new SentenceDetectorFactory("en", true, null, null);
                var stream = new SentenceSampleStream(new PlainTextByLineStream(file));

                var model = SentenceDetectorME.Train("en", stream, sdFactory, mlParams);

                Assert.AreEqual("en", model.Language);
                Assert.AreEqual(model.UseTokenEnd, true);

                var sMe = new SentenceDetectorME(model);
                
                // test the SharpNL sentences
                SentenceDetectorMETest.EvalSentences(sMe);

                var sFile = Path.GetTempFileName();

                model.Serialize(new FileStream(sFile, FileMode.Create));

                var jModel2 = new JavaModel(OpenNLP.CreateInputStream(sFile));

                var jMe = new JavaSDME(jModel2);

                // test the Java OpenNLP sentences.
                JavaEvalSentences(jMe);

                // first try?! Yes! ;-)

            }
        }
Esempio n. 58
0
        /// <summary>
        /// Trains document categorizer model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The document categorizer factory.</param>
        /// <returns>The trained <see cref="DocumentCategorizerModel"/> model.</returns>
        public static DocumentCategorizerModel Train(
            string languageCode,
            IObjectStream<DocumentSample> samples,
            TrainingParameters parameters,
            DocumentCategorizerFactory factory) {

            return Train(languageCode, samples, parameters, factory, null);
        }
Esempio n. 59
0
        /// <summary>
        /// Trains a name finder model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language of the training data.</param>
        /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param>
        /// <param name="samples">The training samples.</param>
        /// <param name="parameters">The machine learning train parameters.</param>
        /// <param name="factory">The name finder factory.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.</param>
        /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns>
        public static TokenNameFinderModel Train(string languageCode, string type, IObjectStream<NameSample> samples, TrainingParameters parameters, TokenNameFinderFactory factory, Monitor monitor) {
            var beamSize = parameters.Get(Parameters.BeamSize, DefaultBeamSize);
            var manifestInfoEntries = new Dictionary<string, string>();
            var trainerType = TrainerFactory.GetTrainerType(parameters);

            IMaxentModel meModel = null;
            ML.Model.ISequenceClassificationModel<string> seqModel = null;

            switch (trainerType) {
                case TrainerType.EventModelTrainer:
                    var eventStream = new NameFinderEventStream(samples, type, factory.CreateContextGenerator(),
                        factory.CreateSequenceCodec());
                    var nfTrainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);

                    meModel = nfTrainer.Train(eventStream);
                    break;
                case TrainerType.EventModelSequenceTrainer:
                    var sampleStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                    var nsTrainer = TrainerFactory.GetEventModelSequenceTrainer(parameters, manifestInfoEntries, monitor);

                    meModel = nsTrainer.Train(sampleStream);
                    break;
                case TrainerType.SequenceTrainer:
                    var sequenceStream = new NameSampleSequenceStream(samples, factory.CreateContextGenerator());
                    var sqTrainer = TrainerFactory.GetSequenceModelTrainer(parameters, manifestInfoEntries, monitor);


                    seqModel = sqTrainer.Train(sequenceStream);
                    break;
                default:
                    throw new InvalidOperationException("Unexpected trainer type!");
            }

            if (seqModel != null) {
                return new TokenNameFinderModel(
                    languageCode,
                    seqModel,
                    factory.FeatureGenerator,
                    factory.Resources,
                    manifestInfoEntries,
                    factory.SequenceCodec,
                    factory);
            }

            return new TokenNameFinderModel(
                languageCode,
                meModel,
                beamSize,
                factory.FeatureGenerator,
                factory.Resources,
                manifestInfoEntries,
                factory.SequenceCodec,
                factory);
        }
Esempio n. 60
0
        /// <summary>
        /// Trains a name finder model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language of the training data.</param>
        /// <param name="type">Overrides the type parameter in the provided samples. This value can be null.</param>
        /// <param name="samples">The training samples.</param>
        /// <param name="parameters">The machine learning train parameters.</param>
        /// <param name="factory">The name finder factory.</param>
        /// <returns>the newly <see cref="TokenNameFinderModel"/> trained model.</returns>
        public static TokenNameFinderModel Train(
            string languageCode,
            string type,
            IObjectStream<NameSample> samples,
            TrainingParameters parameters,
            TokenNameFinderFactory factory) {

            return Train(languageCode, type, samples, parameters, factory, null);
        }