public override int[] GetSplitPositions(List <int> sourceChunksTextLength, string targetText) { _sourceChunksTextLength = sourceChunksTextLength; _targetSplits = StringSplitEngine.Split(targetText); _targetSplitsLength = _targetSplits.ConvertAll(x => x.Length); var rateResults = new List <RateResult>(); for (int i = 0; i < _iterations; i++) { int[] breakPositions = GetRandomBreakPositions(targetText, _sourceChunksTextLength); var rateResult = CalculateRateResult(_sourceChunksTextLength, targetText.Length, breakPositions); rateResults.Add(rateResult); } rateResults = rateResults.OrderBy(x => x.Fitness).ToList(); return(rateResults.First().BreakPositions); }
private IEnumerable <Sentence> ConcatSentences(List <Paragraph> paragraphs) { Sentence currentSentence = new Sentence(); int lastParagraphNumber = int.MinValue; foreach (var paragraph in paragraphs) { if (lastParagraphNumber + 1 != paragraph.Number) //check if paragraphs belong sequentially together { if (currentSentence.Text.Length > 0) //this check avoids to add empty Sentence { yield return(currentSentence); currentSentence = new Sentence(); } } lastParagraphNumber = paragraph.Number; var paragraphWrapper = new ParagraphWrapper(paragraph); var sentenceChunks = SentenceSplitEngine.Split(paragraph.Text); foreach (var sentenceChunk in sentenceChunks) { if (currentSentence.Text.Length > 0 && SentenceDelimiterBeforeChars.Contains(sentenceChunk[0])) { yield return(currentSentence); currentSentence = new Sentence(); } currentSentence.SentenceParagraphs.Add(new SentenceParagraphRelation(sentenceChunk, paragraphWrapper)); if (SentenceDelimiterAfterChars.Contains(sentenceChunk[sentenceChunk.Length - 1])) { yield return(currentSentence); currentSentence = new Sentence(); } } } if (currentSentence.Text.Length > 0) //this check avoid a empty last Sentence (could happen when the last chunk ends with a delimiter) { yield return(currentSentence); } }