public static void PrintSentences(SentenceSegmenter segmenter, IEnumerable <string> paragraphs) { foreach (var paragraph in paragraphs) { PrintSentences(segmenter, paragraph); } }
public static void EvaluateSbd(SentenceSegmenter segmenter) { var taggedParagraphs = File.ReadAllLines(TaggedInput); var evaluations = segmenter.Evaluate(taggedParagraphs); SentenceSegmenterEvaluator.GetTotalReport(evaluations, printFalseAlarms: true); }
public static void PrintSentences(SentenceSegmenter segmenter, string paragraph) { var sentences = segmenter.GetSentences(paragraph); foreach (var sentence in sentences) { Console.WriteLine(sentence); } }
public static List <string> SplitUsingUpssalaSentSegmenter(string str, int chunkSize) { // since Hazm webServer can not handle big size texts, I implemented this function to // first split the text using ParsPer sentence spliter (which can handle big texts). // then return the segemnts for furture processing like Hazm normalization and tokenization string[] sentences = SentenceSegmenter.GetSegments(str); List <string> temp = new List <string>(); for (int i = 0; i < sentences.Length; i++) { if (sentences[i].Length >= chunkSize) { // to break very very large sentences!!! int breakPoint = sentences[i].Length / 2; temp.Add(sentences[i].Substring(0, breakPoint)); temp.Add(sentences[i].Substring(breakPoint, sentences[i].Length - breakPoint)); } else { temp.Add(sentences[i]); } } sentences = temp.ToArray(); List <string> segments = new List <string>(); string currentSegment = ""; int sentenceCounter = 0; while (true) { if (sentenceCounter == sentences.Length) { // the final peice of text is in currentSegment. Dont forget to add it to segments. segments.Add(currentSegment); break; } if (currentSegment.Length + sentences[sentenceCounter].Length <= chunkSize) { currentSegment += sentences[sentenceCounter] + " "; sentenceCounter++; } else { segments.Add(currentSegment); currentSegment = ""; } } return(segments); }