Пример #1
0
        public override int[] GetSplitPositions(List <int> sourceChunksTextLength, string targetText)
        {
            _sourceChunksTextLength = sourceChunksTextLength;
            _targetSplits           = StringSplitEngine.Split(targetText);
            _targetSplitsLength     = _targetSplits.ConvertAll(x => x.Length);

            var rateResults = new List <RateResult>();

            for (int i = 0; i < _iterations; i++)
            {
                int[] breakPositions = GetRandomBreakPositions(targetText, _sourceChunksTextLength);

                var rateResult = CalculateRateResult(_sourceChunksTextLength, targetText.Length, breakPositions);
                rateResults.Add(rateResult);
            }
            rateResults = rateResults.OrderBy(x => x.Fitness).ToList();
            return(rateResults.First().BreakPositions);
        }
Пример #2
0
        private IEnumerable <Sentence> ConcatSentences(List <Paragraph> paragraphs)
        {
            Sentence currentSentence     = new Sentence();
            int      lastParagraphNumber = int.MinValue;

            foreach (var paragraph in paragraphs)
            {
                if (lastParagraphNumber + 1 != paragraph.Number) //check if paragraphs belong sequentially together
                {
                    if (currentSentence.Text.Length > 0)         //this check avoids to add empty Sentence
                    {
                        yield return(currentSentence);

                        currentSentence = new Sentence();
                    }
                }

                lastParagraphNumber = paragraph.Number;
                var paragraphWrapper = new ParagraphWrapper(paragraph);

                var sentenceChunks = SentenceSplitEngine.Split(paragraph.Text);
                foreach (var sentenceChunk in sentenceChunks)
                {
                    if (currentSentence.Text.Length > 0 && SentenceDelimiterBeforeChars.Contains(sentenceChunk[0]))
                    {
                        yield return(currentSentence);

                        currentSentence = new Sentence();
                    }
                    currentSentence.SentenceParagraphs.Add(new SentenceParagraphRelation(sentenceChunk, paragraphWrapper));
                    if (SentenceDelimiterAfterChars.Contains(sentenceChunk[sentenceChunk.Length - 1]))
                    {
                        yield return(currentSentence);

                        currentSentence = new Sentence();
                    }
                }
            }
            if (currentSentence.Text.Length > 0) //this check avoid a empty last Sentence (could happen when the last chunk ends with a delimiter)
            {
                yield return(currentSentence);
            }
        }