예제 #1
0
        /// <summary>
        /// Split the input content to individual words
        /// </summary>
        /// <param name="contents">Content to split into words</param>
        /// <returns></returns>
        public static IEnumerable <string> ExtractSentences(string contents)
        {
            var sentenizer = new SentenceDetectorME(Model);
            var sentences  = sentenizer.sentDetect(contents);

            return(sentences);
        }
예제 #2
0
        static void Main(string[] args)
        {
            String[]     file = Directory.GetFiles(@"..\..\..\", "*.txt");
            StreamWriter sw   = new StreamWriter(@"..\..\..\output\Html.txt");

            foreach (String files in file)
            {
                using (StreamReader sr = new StreamReader(files))
                {
                    while (sr.Peek() != -1)
                    {
                        string line = sr.ReadLine();
                        int    i;
                        java.io.InputStream modelin  = new java.io.FileInputStream(string.Format(@"{0}\en-sent.bin", @"..\Debug"));
                        SentenceModel       model    = new SentenceModel(modelin);
                        SentenceDetector    detector = new SentenceDetectorME(model);
                        string[]            sents    = detector.sentDetect(line);
                        foreach (var sent in sents)
                        {
                            sw.WriteLine(sent);
                        }
                    }
                }
                sw.Flush();
            }
            sw.Close();
        }
예제 #3
0
        /// <summary>
        /// Split the input content to individual words
        /// </summary>
        /// <param name="contents">Content to split into words</param>
        /// <returns></returns>
        public static IEnumerable<string> ExtractSentences(string contents)
        {
            var sentenizer = new SentenceDetectorME(Model);
            var sentences = sentenizer.sentDetect(contents);

            return sentences;
        }
        public void TestEverything()
        {
            using (var file = Tests.OpenFile("/opennlp/tools/sentdetect/Sentences.txt")) {
                var mlParams = new TrainingParameters();

                mlParams.Set(Parameters.Iterations, "100");
                mlParams.Set(Parameters.Cutoff, "0");

                var sdFactory = new SentenceDetectorFactory("en", true, null, null);
                var stream    = new SentenceSampleStream(new PlainTextByLineStream(file));

                var model = SentenceDetectorME.Train("en", stream, sdFactory, mlParams);

                Assert.AreEqual("en", model.Language);
                Assert.AreEqual(model.UseTokenEnd, true);

                var sMe = new SentenceDetectorME(model);

                // test the SharpNL sentences
                SentenceDetectorMETest.EvalSentences(sMe);

                var sFile = Path.GetTempFileName();

                model.Serialize(new FileStream(sFile, FileMode.Create));

                var jModel2 = new JavaModel(OpenNLP.CreateInputStream(sFile));

                var jMe = new JavaSDME(jModel2);

                // test the Java OpenNLP sentences.
                JavaEvalSentences(jMe);

                // first try?! Yes! ;-)
            }
        }
예제 #5
0
        private StringBuilder ReverseIt(string Message)
        {
            StringBuilder reversedString = new StringBuilder();

            //lets do some language processing tasks to identify sentence structure
            SentenceDetectorME sentenceParser = new SentenceDetectorME(LoadNLP.sentenceModel);
            TokenizerME        tokenizer      = new TokenizerME(LoadNLP.tokenModel);

            string[] sentences = sentenceParser.sentDetect(Message);

            //iterate through each sentence
            foreach (string sentence in sentences)
            {
                string[] tokens = tokenizer.tokenize(sentence);

                //reverse the tokens
                for (int i = 0; i < tokens.Length / 2; i++)
                {
                    string storage = tokens[i];
                    tokens[i] = tokens[tokens.Length - i - 1];
                    tokens[tokens.Length - i - 1] = storage;
                }

                //Now that we've organized the sentence nicely, lets detokenize it and convert back to a usable string
                reversedString.Append(DeTokenize(tokens, DetokenizationDictionary.Operation.MOVE_LEFT));
            }

            return(reversedString);
        }
예제 #6
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);

            if (mlParams != null)
            {
                if (TrainUtil.isSequenceTraining(mlParams.Settings))
                {
                    throw new TerminateToolException(1, "Sequence training is not supported!");
                }
            }

            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            Jfile modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile);

            char[] eos = null;
            if (@params.EosChars != null)
            {
                eos = @params.EosChars.ToCharArray();
            }

            SentenceModel model;

            try
            {
                Dictionary dict = loadDict(@params.AbbDict);
                SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(@params.Factory, @params.Lang, true, dict, eos);
                model = SentenceDetectorME.train(@params.Lang, sampleStream, sdFactory, mlParams);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("sentence detector", modelOutFile, model);
        }
예제 #7
0
        private void LoadSentenceDetector()
        {
            if (!alreadyLoadSentenceDetector)
            {
                java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-sent.bin");
                SentenceModel           sentenceModel  = new SentenceModel(modelInpStream);
                sentenceDetector = new SentenceDetectorME(sentenceModel);

                alreadyLoadSentenceDetector = true;
            }
        }
예제 #8
0
        public Span[] SentPosDetect(string paragraph)
        {
            var                bin       = GetFileStream("en-sent.bin");
            SentenceModel      model     = new SentenceModel(bin);
            SentenceDetectorME sdetector = new SentenceDetectorME(model);

            Span[] sentences = sdetector.sentPosDetect(paragraph);

            bin.close();

            return(sentences);
        }
        public static SentenceModel TrainModel(string path)
        {
            FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read);

            TrainingParameters trainParams = new TrainingParameters();

            trainParams.Set(Parameters.Iterations, "100");
            trainParams.Set(Parameters.Cutoff, "0");

            SentenceDetectorFactory detectorFactory = new SentenceDetectorFactory(TRAINING_LANGUAGE, true, null, null);
            SentenceSampleStream    sampleStream    = new SentenceSampleStream(new PlainTextByLineStream(fs));

            return(SentenceDetectorME.Train(TRAINING_LANGUAGE, sampleStream, detectorFactory, trainParams));
        }
예제 #10
0
        static void Sent(string src)
        {
            StreamReader input  = new StreamReader(src);
            StreamWriter output = new StreamWriter(Regex.Replace(src, "(.*).txt", "$1_sent.txt"));

            while (input.Peek() != -1)
            {
                InputStream      modelIn  = new FileInputStream(@"..\..\..\en-sent.bin");
                SentenceModel    smodel   = new SentenceModel(modelIn);
                SentenceDetector detector = new SentenceDetectorME(smodel);
                string[]         sents    = detector.sentDetect(input.ReadLine());
                foreach (string sent in sents)
                {
                    output.WriteLine(sent);
                }
            }
        }
예제 #11
0
        public void AbbreviationDefaultBehaviorTest()
        {
            var samples =
                "Test E-mail met zowel letsel als 12. Toedracht in het onderwerp." + Environment.NewLine +
                "Dit is een 2e regel met een tel. 011-4441444 erin." + Environment.NewLine +
                "Dit is een 2e regel." + Environment.NewLine +
                "Dit is een 2e regel." + Environment.NewLine + Environment.NewLine +

                "Dit is een 2e regel met een tel. 033-1333123 erin!" + Environment.NewLine +
                "Test E-mail met zowel winst als 12. toedracht in het onderwerp." + Environment.NewLine +
                "Dit is een 2e regel!" + Environment.NewLine +
                "Dit is een 2e regel." + Environment.NewLine;

            var stringsToIgnoreDictionary = new SharpNL.Dictionary.Dictionary(false)
            {
                { "12. Toedracht" },
                { "Tel." },
            };

            var trainingParameters = new TrainingParameters();

            trainingParameters.Set(Parameters.Algorithm, "MAXENT");
            trainingParameters.Set(Parameters.TrainerType, "Event");
            trainingParameters.Set(Parameters.Iterations, "100");
            trainingParameters.Set(Parameters.Cutoff, "5");

            char[] eos          = { '.', '?', '!' };
            var    sdFactory    = new SentenceDetectorFactory("nl", true, stringsToIgnoreDictionary, eos);
            var    stringReader = new StringReader(samples);
            var    stream       = new SentenceSampleStream(new PlainTextByLineStream(stringReader));

            var sentenceModel      = SentenceDetectorME.Train("nl", stream, sdFactory, trainingParameters);
            var sentenceDetectorMe = new SentenceDetectorME(sentenceModel);

            var sentences = sentenceDetectorMe.SentDetect(samples);
            var expected  = samples.Split(new [] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);


            Assert.AreEqual(8, sentences.Length);
            for (var i = 0; i < sentences.Length; i++)
            {
                Assert.AreEqual(expected[i], sentences[i]);
            }
        }
예제 #12
0
        public void TestSentenceDetector()
        {
            using (var file = Tests.OpenFile("/opennlp/tools/sentdetect/Sentences.txt")) {
                var mlParams = new TrainingParameters();

                mlParams.Set(Parameters.Iterations, "100");
                mlParams.Set(Parameters.Cutoff, "0");

                var sdFactory = new SentenceDetectorFactory("en", true, null, null);
                var stream    = new SentenceSampleStream(new PlainTextByLineStream(file));

                var model = SentenceDetectorME.Train("en", stream, sdFactory, mlParams);

                Assert.AreEqual("en", model.Language);
                Assert.AreEqual(model.UseTokenEnd, true);

                EvalSentences(new SentenceDetectorME(model));
            }
        }
예제 #13
0
파일: OpenNLP.cs 프로젝트: baio/d-mill
        public void SplitSentences()
        {
            var txt = File.ReadAllText(@"c:\dev\d-mill\uspe\Data\uspe-1.txt");

            txt = Regex.Replace(txt, "\\s+", " ");

            txt = Regex.Replace(txt, "\\r\\n", "");

            txt = Regex.Replace(txt, "MR.\\s+", "MR.");

            var modelStream = new java.io.FileInputStream("../../Models/en-sent.bin");

            var model = new SentenceModel(modelStream);

            var detector = new SentenceDetectorME(model);

            var sentences = detector.sentDetect(txt);

            File.WriteAllLines(@"c:\dev\d-mill\uspe\Data\uspe-sentenced.txt", sentences);
        }
예제 #14
0
        public static void Main(string[] args)
        {
            DirectoryInfo folder = new DirectoryInfo(@"..\..\..\..\..\Dataset");

            foreach (var fname in folder.GetFiles())
            {
                String line = File.ReadAllText(fname.FullName);
                java.io.InputStream modelIn  = new java.io.FileInputStream(@"..\..\..\..\..\en-sent.bin");
                SentenceModel       smodel   = new SentenceModel(modelIn);
                SentenceDetector    detector = new SentenceDetectorME(smodel);
                string[]            sents    = detector.sentDetect(line);
                using (StreamWriter sw = new StreamWriter(fname.FullName.Replace(fname.FullName.Substring(fname.FullName.Length - 3), "rtf")))
                {
                    foreach (var sent in sents)
                    {
                        sw.WriteLine(sent);
                    }
                }
            }
        }
예제 #15
0
        /// <summary>
        /// Perform sentence detection the input stream.
        ///
        /// A newline will be treated as a paragraph boundary.
        /// </summary>
        public override void run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                SentenceModel model = (new SentenceModelLoader()).load(new File(args[0]));

                SentenceDetectorME sdetector = new SentenceDetectorME(model);

                ObjectStream <string> paraStream = new ParagraphStream(new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string para;
                    while ((para = paraStream.read()) != null)
                    {
                        string[] sents = sdetector.sentDetect(para);
                        foreach (string sentence in sents)
                        {
                            Console.WriteLine(sentence);
                        }

                        perfMon.incrementCounter(sents.Length);

                        Console.WriteLine();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
예제 #16
0
        public NLP()
        {
            //loading sentence detector model
            java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-sent.bin");
            SentenceModel           sentenceModel  = new SentenceModel(modelInpStream);

            sentenceDetector = new SentenceDetectorME(sentenceModel);

            //loading tokenizer model
            modelInpStream = new java.io.FileInputStream("Resources\\en-token.bin");
            TokenizerModel tokenizerModel = new TokenizerModel(modelInpStream);

            tokenizer = new TokenizerME(tokenizerModel);

            modelInpStream = new java.io.FileInputStream("Resources\\en-pos-maxent.bin");
            POSModel posModel = new POSModel(modelInpStream);

            tagger = new POSTaggerME(posModel);

            modelInpStream = new java.io.FileInputStream("Resources\\en-chunker.bin");
            ChunkerModel chunkerModel = new ChunkerModel(modelInpStream);

            chunker = new ChunkerME(chunkerModel);

            modelInpStream = new java.io.FileInputStream("Resources\\en-parser-chunking.bin");
            ParserModel parserModel = new ParserModel(modelInpStream);

            parser = ParserFactory.create(parserModel);

            //loading stop words list
            StreamReader sr = new StreamReader("Resources\\english.stop.txt");
            string       line;

            while ((line = sr.ReadLine()) != null)
            {
                stopwords.Add(Stemming(line));
                stopwords.Add(line);
            }
        }
예제 #17
0
        static void Main(string[] args)
        {
            StreamWriter sw = new StreamWriter(@"..\..\Data\result.txt");
            StreamReader sr = new StreamReader(@"..\..\Data\data.txt");

            while (sr.Peek() != -1)
            {
                string line = sr.ReadLine();
                java.io.InputStream modelIn  = new java.io.FileInputStream("en-sent.bin");
                SentenceModel       smodel   = new SentenceModel(modelIn);
                SentenceDetector    detector = new SentenceDetectorME(smodel);
                string[]            sents    = detector.sentDetect(line);
                foreach (var sent in sents)
                {
                    sw.WriteLine(sent);
                    sw.WriteLine();
                }
                sw.Flush();
            }
            sr.Close();
            sw.Close();
        }
        static void Main(string[] args)
        {
            java.io.InputStream modelIn  = new java.io.FileInputStream(string.Format("en-sent.bin"));
            java.io.InputStream modelIn2 = new java.io.FileInputStream(string.Format("en-token.bin"));
            TokenizerModel      model    = new TokenizerModel(modelIn2);
            TokenizerME         mE       = new TokenizerME(model);
            SentenceModel       sM       = new SentenceModel(modelIn);
            SentenceDetector    detector = new SentenceDetectorME(sM);
            string folderName            = @"C:\Users\Administrator\Desktop\lab-6-opennlp-ju-zi-qie-fen-10411174\file";

            foreach (string fname in System.IO.Directory.GetFiles(folderName))
            {
                String       line  = null;
                String[]     name  = fname.Split('\\');
                StreamWriter sw    = new StreamWriter(@"C:\Users\Administrator\Desktop\lab-6-opennlp-ju-zi-qie-fen-10411174\answer\" + name[6]);
                StreamReader file2 = new StreamReader(fname);
                while ((line = file2.ReadLine()) != null)
                {
                    string   str   = null;
                    string[] sents = detector.sentDetect(line);
                    if (sents.Length.Equals(0))
                    {
                        continue;
                    }
                    foreach (var s in sents)
                    {
                        str = str + s;
                    }
                    var Tokens = mE.tokenize(str);
                    foreach (var s in Tokens)
                    {
                        sw.Write(s + " ");
                    }
                    sw.WriteLine();
                }
                sw.Close();
            }
        }
 private static SentenceModel Train(SentenceDetectorFactory factory)
 {
     return(SentenceDetectorME.Train("en", CreateSampleStream(), factory, TrainingParameters.DefaultParameters()));
 }
예제 #20
0
        // Constructors and finalizers:
        private Repository()
        {
            _assemblyName = Regex.Match(_assemblyFullName, "^(.*?),.*$").Result("$1");

            _rootDrive = ("/usr/project/xtmp/dp195/Poetix18/").Replace(@"\", Dsc);
            _nlpFolder = ("rhetorica/nlp/").Replace(@"\", Dsc);

            _openNlpModelsFolder = ("OpenNLP/models/").Replace(@"\", Dsc);
            _openNlpModelsPath   = RootDrive + _nlpFolder + _openNlpModelsFolder;

            _wordNetFolder = ("WordNet_3/").Replace(@"\", Dsc);
            _wordNetPath   = RootDrive + _nlpFolder + _wordNetFolder;

            _grammarFolder = ("StanfordParser/grammar/").Replace(@"\", Dsc);
            _grammarPath   = RootDrive + _nlpFolder + _grammarFolder;

            _dataFolder   = ("data/").Replace(@"\", Dsc);
            _nlpTextsPath = RootDrive + _dataFolder;

            string[] localTextDirectoryParts =
            {
                CurrentAssemblyDirectoryPath,
                "..",                        "..","..", "data"
                //"..", "..", "text"
            };
            _localTextPath = Path.Combine(localTextDirectoryParts) + "/"; // For development use

            // WordNet engine:
            Console.Write("Loading WordNet engine.... ");
            _wordNetEngine = new WordNetEngine(WordNetPath, true);
            Console.WriteLine("Done.");

            // OpenNLP sentence detector:
            Console.Write("Loading OpenNLP sentence detector.... ");
            java.io.FileInputStream modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-sent.bin");
            _sentenceModel = new SentenceModel(modelInputStream);
            modelInputStream.close();
            _sentenceDetector = new SentenceDetectorME(_sentenceModel);
            Console.WriteLine("Done.");

            // OpenNLP tokenizer:
            Console.Write("Loading OpenNLP tokenizer.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-token.bin");
            _tokenizerModel  = new opennlp.tools.tokenize.TokenizerModel(modelInputStream);
            modelInputStream.close();
            _tokenizer = new opennlp.tools.tokenize.TokenizerME(_tokenizerModel);
            Console.WriteLine("Done.");

            // OpenNLP name finder:
            Console.Write("Loading OpenNLP name finder.... ");
            modelInputStream      = new java.io.FileInputStream(OpenNlpModelsPath + "en-ner-person.bin");
            _tokenNameFinderModel = new TokenNameFinderModel(modelInputStream);
            modelInputStream.close();
            _nameFinder = new NameFinderME(_tokenNameFinderModel);
            Console.WriteLine("Done.");

            // OpenNLP POS tagger:
            Console.Write("Loading OpenNLP POS tagger.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-pos-maxent.bin");
            _posModel        = new POSModel(modelInputStream);
            modelInputStream.close();
            _tagger = new POSTaggerME(_posModel);
            Console.WriteLine("Done.");

            // OpenNLP chunker:
            Console.Write("Loading OpenNLP chunker.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-chunker.bin");
            _chunkerModel    = new ChunkerModel(modelInputStream);
            modelInputStream.close();
            _chunker = new ChunkerME(_chunkerModel);
            Console.WriteLine("Done.");

            // OpenNLP parser:
            if (_loadParser)
            {
                Console.Write("Loading OpenNLP parser.... ");
                modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-parser-chunking.bin");
                _parserModel     = new ParserModel(modelInputStream);
                modelInputStream.close();
                _parser = ParserFactory.create(_parserModel);
                Console.WriteLine("Done.");
            }

            // Stanford parser:
            //_stanfordParser = new LexicalizedParser(GrammarPath + "englishPCFG.ser.gz"); // Obsolete method
            _stanfordParser = LexicalizedParser.loadModel(GrammarPath + "englishPCFG.ser.gz");

            // Porter stemmer:
            _porterStemmer = new PorterStemmer();
        }
 public SentenceDetector(SentenceModel model)
 {
     this.detector = new SentenceDetectorME(model);
 }
 public SentenceDetector()
 {
     this.detector = new SentenceDetectorME(TrainModel(Environment.CurrentDirectory + TRAINING_MODEL_PATH));
 }
        public SentenceDetector(FileStream modelStream)
        {
            SentenceModel model = new SentenceModel(modelStream);

            this.detector = new SentenceDetectorME(model);
        }
예제 #24
0
 public NLPSentenceDetectorOp()
 {
     sentenceSplitter = null;
 }
예제 #25
0
 public NLPSentenceDetectorOp(SentenceModel model)
 {
     sentenceSplitter = new SentenceDetectorME(model);
 }
예제 #26
0
        public String chooseSentenceMenu()
        {
            int userInputNumber = 0;
            string userChoosenSentence = "";
            System.Console.WriteLine("Choose a sentence to use from your current text");
            System.Console.WriteLine("Must be a space between each sentence");
            try
            {
                java.io.File file = new java.io.File("C:\\en-sent.bin");
                java.io.InputStream modelIn = new FileInputStream("C:\\Users\\jcoleman\\Documents\\Capstone\\jcoleman_Capstone\\Code\\NEWCHATBOT\\ConsoleBot\\ConsoleBot\\en-sent.bin");
                SentenceModel model = new SentenceModel(modelIn);
                SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);

                string text = "";
                FileText = System.IO.File.ReadAllLines(FilePath);

                for (int i = 0; i < FileText.Length; i++)
                {
                    text += FileText[i];
                }

                string[] sentences = sentenceDetector.sentDetect(text);

                for(int s = 0;s < sentences.Length;s++)
                {
                    System.Console.WriteLine((s+1) +" : " +sentences[s]);
                }

                string userInput = System.Console.ReadLine();
                userInputNumber = int.Parse(userInput);

                userChoosenSentence = sentences[userInputNumber - 1];
                modelIn.close();
            }
            catch(Exception e)
            {
                System.Console.WriteLine(e.Message);
            }

            return userChoosenSentence;
        }
예제 #27
0
        internal static void EvalSentences(SentenceDetectorME sentDetect)
        {
            const string sampleSentences1 = "This is a test. There are many tests, this is the second.";
            var          sents            = sentDetect.SentDetect(sampleSentences1);

            Assert.AreEqual(sents.Length, 2);
            Assert.AreEqual(sents[0], "This is a test.");
            Assert.AreEqual(sents[1], "There are many tests, this is the second.");
            var probs = sentDetect.GetSentenceProbabilities();

            Assert.AreEqual(probs.Length, 2);

            const string sampleSentences2 = "This is a test. There are many tests, this is the second";

            sents = sentDetect.SentDetect(sampleSentences2);
            Assert.AreEqual(sents.Length, 2);
            probs = sentDetect.GetSentenceProbabilities();
            Assert.AreEqual(probs.Length, 2);
            Assert.AreEqual(sents[0], "This is a test.");
            Assert.AreEqual(sents[1], "There are many tests, this is the second");

            const string sampleSentences3 = "This is a \"test\". He said \"There are many tests, this is the second.\"";

            sents = sentDetect.SentDetect(sampleSentences3);
            Assert.AreEqual(sents.Length, 2);
            probs = sentDetect.GetSentenceProbabilities();
            Assert.AreEqual(probs.Length, 2);
            Assert.AreEqual(sents[0], "This is a \"test\".");
            Assert.AreEqual(sents[1], "He said \"There are many tests, this is the second.\"");

            const string sampleSentences4 = "This is a \"test\". I said \"This is a test.\"  Any questions?";

            sents = sentDetect.SentDetect(sampleSentences4);
            Assert.AreEqual(sents.Length, 3);
            probs = sentDetect.GetSentenceProbabilities();
            Assert.AreEqual(probs.Length, 3);
            Assert.AreEqual(sents[0], "This is a \"test\".");
            Assert.AreEqual(sents[1], "I said \"This is a test.\"");
            Assert.AreEqual(sents[2], "Any questions?");

            const string sampleSentences5 = "This is a one sentence test space at the end.    ";

            sents = sentDetect.SentDetect(sampleSentences5);
            Assert.AreEqual(1, sentDetect.GetSentenceProbabilities().Length);
            Assert.AreEqual(sents[0], "This is a one sentence test space at the end.");

            const string sampleSentences6 = "This is a one sentences test with tab at the end.            ";

            sents = sentDetect.SentDetect(sampleSentences6);
            Assert.AreEqual(sents[0], "This is a one sentences test with tab at the end.");

            const string sampleSentences7 = "This is a test.    With spaces between the two sentences.";

            sents = sentDetect.SentDetect(sampleSentences7);
            Assert.AreEqual(sents[0], "This is a test.");
            Assert.AreEqual(sents[1], "With spaces between the two sentences.");

            const string sampleSentences9 = "";

            sents = sentDetect.SentDetect(sampleSentences9);
            Assert.AreEqual(0, sents.Length);

            const string sampleSentences10 = "               "; // whitespaces and tabs

            sents = sentDetect.SentDetect(sampleSentences10);
            Assert.AreEqual(0, sents.Length);

            const string sampleSentences11 = "This is test sentence without a dot at the end and spaces          ";

            sents = sentDetect.SentDetect(sampleSentences11);
            Assert.AreEqual(sents[0], "This is test sentence without a dot at the end and spaces");
            probs = sentDetect.GetSentenceProbabilities();
            Assert.AreEqual(1, probs.Length);

            const string sampleSentence12 = "    This is a test.";

            sents = sentDetect.SentDetect(sampleSentence12);
            Assert.AreEqual(sents[0], "This is a test.");

            const string sampleSentence13 = " This is a test";

            sents = sentDetect.SentDetect(sampleSentence13);
            Assert.AreEqual(sents[0], "This is a test");

            // Test that sentPosDetect also works
            var pos = sentDetect.SentPosDetect(sampleSentences2);

            Assert.AreEqual(pos.Length, 2);
            probs = sentDetect.GetSentenceProbabilities();
            Assert.AreEqual(probs.Length, 2);
            Assert.AreEqual(new Span(0, 15), pos[0]);
            Assert.AreEqual(new Span(16, 56), pos[1]);
        }
예제 #28
0
        public static string[] SplitSentences(string Text)
        {
            var modelStream = new java.io.ByteArrayInputStream(Resource.en_sent);

            var model = new SentenceModel(modelStream);

            var detector = new SentenceDetectorME(model);

            return detector.sentDetect(Text);
        }