Ejemplo n.º 1
0
 public static void PrintSentences(SentenceSegmenter segmenter, IEnumerable <string> paragraphs)
 {
     foreach (var paragraph in paragraphs)
     {
         PrintSentences(segmenter, paragraph);
     }
 }
Ejemplo n.º 2
0
        public static void EvaluateSbd(SentenceSegmenter segmenter)
        {
            var taggedParagraphs = File.ReadAllLines(TaggedInput);
            var evaluations      = segmenter.Evaluate(taggedParagraphs);

            SentenceSegmenterEvaluator.GetTotalReport(evaluations, printFalseAlarms: true);
        }
Ejemplo n.º 3
0
        public static void PrintSentences(SentenceSegmenter segmenter, string paragraph)
        {
            var sentences = segmenter.GetSentences(paragraph);

            foreach (var sentence in sentences)
            {
                Console.WriteLine(sentence);
            }
        }
Ejemplo n.º 4
0
        public static List <string> SplitUsingUpssalaSentSegmenter(string str, int chunkSize)
        {
            // since Hazm webServer can not handle big size texts, I implemented this function to
            // first split the text using ParsPer sentence spliter (which can handle big texts).
            // then return the segemnts for furture processing like Hazm normalization and tokenization

            string[] sentences = SentenceSegmenter.GetSegments(str);

            List <string> temp = new List <string>();

            for (int i = 0; i < sentences.Length; i++)
            {
                if (sentences[i].Length >= chunkSize)
                {
                    // to break very very large sentences!!!
                    int breakPoint = sentences[i].Length / 2;
                    temp.Add(sentences[i].Substring(0, breakPoint));
                    temp.Add(sentences[i].Substring(breakPoint, sentences[i].Length - breakPoint));
                }
                else
                {
                    temp.Add(sentences[i]);
                }
            }
            sentences = temp.ToArray();

            List <string> segments = new List <string>();

            string currentSegment  = "";
            int    sentenceCounter = 0;

            while (true)
            {
                if (sentenceCounter == sentences.Length)
                {
                    // the final peice of text is in currentSegment. Dont forget to add it to segments.
                    segments.Add(currentSegment);
                    break;
                }

                if (currentSegment.Length + sentences[sentenceCounter].Length <= chunkSize)
                {
                    currentSegment += sentences[sentenceCounter] + " ";
                    sentenceCounter++;
                }
                else
                {
                    segments.Add(currentSegment);
                    currentSegment = "";
                }
            }
            return(segments);
        }