Ejemplo n.º 1
0
 /// <summary>
 /// Trains sentence detection model with the given parameters.
 /// </summary>
 /// <param name="languageCode">The language code.</param>
 /// <param name="samples">The data samples.</param>
 /// <param name="parameters">The machine learnable parameters.</param>
 /// <param name="factory">The sentence detector factory.</param>
 /// <returns>The trained <see cref="SentenceModel"/> object.</returns>
 public static SentenceModel Train(
     string languageCode,
     IObjectStream <SentenceSample> samples,
     SentenceDetectorFactory factory,
     TrainingParameters parameters)
 {
     return(Train(languageCode, samples, factory, parameters, null));
 }
Ejemplo n.º 2
0
        public void TestSentenceDetector() {
            using (var file = Tests.OpenFile("/opennlp/tools/sentdetect/Sentences.txt")) {

                var mlParams = new TrainingParameters();

                mlParams.Set(Parameters.Iterations, "100");
                mlParams.Set(Parameters.Cutoff, "0");

                var sdFactory = new SentenceDetectorFactory("en", true, null, null);
                var stream = new SentenceSampleStream(new PlainTextByLineStream(file));

                var model = SentenceDetectorME.Train("en", stream, sdFactory, mlParams);

                Assert.AreEqual("en", model.Language);
                Assert.AreEqual(model.UseTokenEnd, true);

                EvalSentences(new SentenceDetectorME(model));
            }
        }
Ejemplo n.º 3
0
        public void AbbreviationDefaultBehaviorTest() {

            var samples =
                "Test E-mail met zowel letsel als 12. Toedracht in het onderwerp." + Environment.NewLine +
                "Dit is een 2e regel met een tel. 011-4441444 erin." + Environment.NewLine +
                "Dit is een 2e regel." + Environment.NewLine +
                "Dit is een 2e regel." + Environment.NewLine + Environment.NewLine +

                "Dit is een 2e regel met een tel. 033-1333123 erin!" + Environment.NewLine +
                "Test E-mail met zowel winst als 12. toedracht in het onderwerp." + Environment.NewLine +
                "Dit is een 2e regel!" + Environment.NewLine +
                "Dit is een 2e regel." + Environment.NewLine;

            var stringsToIgnoreDictionary = new SharpNL.Dictionary.Dictionary(false) {
                {"12. Toedracht"},
                {"Tel."},
            };

            var trainingParameters = new TrainingParameters();

            trainingParameters.Set(Parameters.Algorithm, "MAXENT");
            trainingParameters.Set(Parameters.TrainerType, "Event");
            trainingParameters.Set(Parameters.Iterations, "100");
            trainingParameters.Set(Parameters.Cutoff, "5");

            char[] eos = { '.', '?', '!' };
            var sdFactory = new SentenceDetectorFactory("nl", true, stringsToIgnoreDictionary, eos);
            var stringReader = new StringReader(samples);
            var stream = new SentenceSampleStream(new PlainTextByLineStream(stringReader));

            var sentenceModel = SentenceDetectorME.Train("nl", stream, sdFactory, trainingParameters);
            var sentenceDetectorMe = new SentenceDetectorME(sentenceModel);

            var sentences = sentenceDetectorMe.SentDetect(samples);
            var expected = samples.Split(new []{ Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);


            Assert.AreEqual(8, sentences.Length);
            for (var i = 0; i < sentences.Length; i++)
                Assert.AreEqual(expected[i], sentences[i]);
            
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Trains sentence detection model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="SentenceModel"/> object.</returns>
        public static SentenceModel Train(string languageCode, IObjectStream <SentenceSample> samples, SentenceDetectorFactory factory, TrainingParameters parameters, Monitor monitor)
        {
            var manifestInfoEntries = new Dictionary <string, string>();

            // TODO: Fix the EventStream to throw exceptions when training goes wrong
            var eventStream = new SentenceEventStream(
                samples,
                factory.GetContextGenerator(),
                factory.GetEndOfSentenceScanner());

            var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);
            var model   = trainer.Train(eventStream);

            return(new SentenceModel(languageCode, model, manifestInfoEntries, factory));
        }
Ejemplo n.º 5
0
 public SentenceModel(string languageCode, IMaxentModel sentModel, Dictionary <string, string> manifestInfoEntries,
                      SentenceDetectorFactory sdFactory) : base(ComponentName, languageCode, manifestInfoEntries, sdFactory)
 {
     artifactMap.Add(EntryName, sentModel);
     CheckArtifactMap();
 }
Ejemplo n.º 6
0
 public SentenceModel(string languageCode, IMaxentModel sentModel, Dictionary<string, string> manifestInfoEntries,
     SentenceDetectorFactory sdFactory) : base(ComponentName, languageCode, manifestInfoEntries, sdFactory) {
     artifactMap.Add(EntryName, sentModel);
     CheckArtifactMap();
 }
Ejemplo n.º 7
0
        public void TestEverything() {
            using (var file = Tests.OpenFile("/opennlp/tools/sentdetect/Sentences.txt")) {

                var mlParams = new TrainingParameters();

                mlParams.Set(Parameters.Iterations, "100");
                mlParams.Set(Parameters.Cutoff, "0");

                var sdFactory = new SentenceDetectorFactory("en", true, null, null);
                var stream = new SentenceSampleStream(new PlainTextByLineStream(file));

                var model = SentenceDetectorME.Train("en", stream, sdFactory, mlParams);

                Assert.AreEqual("en", model.Language);
                Assert.AreEqual(model.UseTokenEnd, true);

                var sMe = new SentenceDetectorME(model);
                
                // test the SharpNL sentences
                SentenceDetectorMETest.EvalSentences(sMe);

                var sFile = Path.GetTempFileName();

                model.Serialize(new FileStream(sFile, FileMode.Create));

                var jModel2 = new JavaModel(OpenNLP.CreateInputStream(sFile));

                var jMe = new JavaSDME(jModel2);

                // test the Java OpenNLP sentences.
                JavaEvalSentences(jMe);

                // first try?! Yes! ;-)

            }
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Trains sentence detection model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="SentenceModel"/> object.</returns>
        public static SentenceModel Train(string languageCode, IObjectStream<SentenceSample> samples, SentenceDetectorFactory factory, TrainingParameters parameters, Monitor monitor) {

            var manifestInfoEntries = new Dictionary<string, string>();

            // TODO: Fix the EventStream to throw exceptions when training goes wrong
            var eventStream = new SentenceEventStream(
                samples, 
                factory.GetContextGenerator(),
                factory.GetEndOfSentenceScanner());

            var trainer = TrainerFactory.GetEventTrainer(parameters, manifestInfoEntries, monitor);
            var model = trainer.Train(eventStream);

            return new SentenceModel(languageCode, model, manifestInfoEntries, factory);
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Trains sentence detection model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="factory">The sentence detector factory.</param>
        /// <returns>The trained <see cref="SentenceModel"/> object.</returns>
        public static SentenceModel Train(
            string languageCode,
            IObjectStream<SentenceSample> samples,
            SentenceDetectorFactory factory,
            TrainingParameters parameters) {

            return Train(languageCode, samples, factory, parameters, null);
        }
 private static SentenceModel Train(SentenceDetectorFactory factory) {
     return SentenceDetectorME.Train("en", CreateSampleStream(), factory, TrainingParameters.DefaultParameters());
 }