Ejemplo n.º 1
0
        public void AbbreviationDefaultBehaviorTest() {

            var samples =
                "Test E-mail met zowel letsel als 12. Toedracht in het onderwerp." + Environment.NewLine +
                "Dit is een 2e regel met een tel. 011-4441444 erin." + Environment.NewLine +
                "Dit is een 2e regel." + Environment.NewLine +
                "Dit is een 2e regel." + Environment.NewLine + Environment.NewLine +

                "Dit is een 2e regel met een tel. 033-1333123 erin!" + Environment.NewLine +
                "Test E-mail met zowel winst als 12. toedracht in het onderwerp." + Environment.NewLine +
                "Dit is een 2e regel!" + Environment.NewLine +
                "Dit is een 2e regel." + Environment.NewLine;

            var stringsToIgnoreDictionary = new SharpNL.Dictionary.Dictionary(false) {
                {"12. Toedracht"},
                {"Tel."},
            };

            var trainingParameters = new TrainingParameters();

            trainingParameters.Set(Parameters.Algorithm, "MAXENT");
            trainingParameters.Set(Parameters.TrainerType, "Event");
            trainingParameters.Set(Parameters.Iterations, "100");
            trainingParameters.Set(Parameters.Cutoff, "5");

            char[] eos = { '.', '?', '!' };
            var sdFactory = new SentenceDetectorFactory("nl", true, stringsToIgnoreDictionary, eos);
            var stringReader = new StringReader(samples);
            var stream = new SentenceSampleStream(new PlainTextByLineStream(stringReader));

            var sentenceModel = SentenceDetectorME.Train("nl", stream, sdFactory, trainingParameters);
            var sentenceDetectorMe = new SentenceDetectorME(sentenceModel);

            var sentences = sentenceDetectorMe.SentDetect(samples);
            var expected = samples.Split(new []{ Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);


            Assert.AreEqual(8, sentences.Length);
            for (var i = 0; i < sentences.Length; i++)
                Assert.AreEqual(expected[i], sentences[i]);
            
        }
Ejemplo n.º 2
0
        internal static void EvalSentences(SentenceDetectorME sentDetect) {
            const string sampleSentences1 = "This is a test. There are many tests, this is the second.";
            var sents = sentDetect.SentDetect(sampleSentences1);
            Assert.AreEqual(sents.Length, 2);
            Assert.AreEqual(sents[0], "This is a test.");
            Assert.AreEqual(sents[1], "There are many tests, this is the second.");
            var probs = sentDetect.GetSentenceProbabilities();
            Assert.AreEqual(probs.Length, 2);

            const string sampleSentences2 = "This is a test. There are many tests, this is the second";
            sents = sentDetect.SentDetect(sampleSentences2);
            Assert.AreEqual(sents.Length, 2);
            probs = sentDetect.GetSentenceProbabilities();
            Assert.AreEqual(probs.Length, 2);
            Assert.AreEqual(sents[0], "This is a test.");
            Assert.AreEqual(sents[1], "There are many tests, this is the second");

            const string sampleSentences3 = "This is a \"test\". He said \"There are many tests, this is the second.\"";
            sents = sentDetect.SentDetect(sampleSentences3);
            Assert.AreEqual(sents.Length, 2);
            probs = sentDetect.GetSentenceProbabilities();
            Assert.AreEqual(probs.Length, 2);
            Assert.AreEqual(sents[0], "This is a \"test\".");
            Assert.AreEqual(sents[1], "He said \"There are many tests, this is the second.\"");

            const string sampleSentences4 = "This is a \"test\". I said \"This is a test.\"  Any questions?";
            sents = sentDetect.SentDetect(sampleSentences4);
            Assert.AreEqual(sents.Length, 3);
            probs = sentDetect.GetSentenceProbabilities();
            Assert.AreEqual(probs.Length, 3);
            Assert.AreEqual(sents[0], "This is a \"test\".");
            Assert.AreEqual(sents[1], "I said \"This is a test.\"");
            Assert.AreEqual(sents[2], "Any questions?");

            const string sampleSentences5 = "This is a one sentence test space at the end.    ";
            sents = sentDetect.SentDetect(sampleSentences5);
            Assert.AreEqual(1, sentDetect.GetSentenceProbabilities().Length);
            Assert.AreEqual(sents[0], "This is a one sentence test space at the end.");

            const string sampleSentences6 = "This is a one sentences test with tab at the end.            ";
            sents = sentDetect.SentDetect(sampleSentences6);
            Assert.AreEqual(sents[0], "This is a one sentences test with tab at the end.");

            const string sampleSentences7 = "This is a test.    With spaces between the two sentences.";
            sents = sentDetect.SentDetect(sampleSentences7);
            Assert.AreEqual(sents[0], "This is a test.");
            Assert.AreEqual(sents[1], "With spaces between the two sentences.");

            const string sampleSentences9 = "";
            sents = sentDetect.SentDetect(sampleSentences9);
            Assert.AreEqual(0, sents.Length);

            const string sampleSentences10 = "               "; // whitespaces and tabs
            sents = sentDetect.SentDetect(sampleSentences10);
            Assert.AreEqual(0, sents.Length);

            const string sampleSentences11 = "This is test sentence without a dot at the end and spaces          ";
            sents = sentDetect.SentDetect(sampleSentences11);
            Assert.AreEqual(sents[0], "This is test sentence without a dot at the end and spaces");
            probs = sentDetect.GetSentenceProbabilities();
            Assert.AreEqual(1, probs.Length);

            const string sampleSentence12 = "    This is a test.";
            sents = sentDetect.SentDetect(sampleSentence12);
            Assert.AreEqual(sents[0], "This is a test.");

            const string sampleSentence13 = " This is a test";
            sents = sentDetect.SentDetect(sampleSentence13);
            Assert.AreEqual(sents[0], "This is a test");

            // Test that sentPosDetect also works
            var pos = sentDetect.SentPosDetect(sampleSentences2);
            Assert.AreEqual(pos.Length, 2);
            probs = sentDetect.GetSentenceProbabilities();
            Assert.AreEqual(probs.Length, 2);
            Assert.AreEqual(new Span(0, 15), pos[0]);
            Assert.AreEqual(new Span(16, 56), pos[1]);
        }