public static IReadOnlyList <string> Process(string text, ILanguage language)
        {
            text = ListItemReplacer.AddLineBreak(text);
            text = language.AbbreviationReplacer.Replace(text);
            text = language.NumberRules.Apply(text);
            text = ReplaceContinuousPunctuation(text, language);
            text = language.WithMultiplePeriodsAndEmailRule.Apply(text);
            text = language.GeoLocationRule.Apply(text);
            text = language.FileFormatRule.Apply(text);

            var segments = InternalSegmenter.Segment(text, language);

            return(segments);
        }
Exemple #2
0
        private static IReadOnlyList <string> CheckForPunctuation(string text, ILanguage language)
        {
            var containsPunctuation = false;
            var endsWithPunctuation = false;

            for (var i = 0; i < language.Punctuations.Count; i++)
            {
                var index = text.IndexOf(language.Punctuations[i], StringComparison.OrdinalIgnoreCase);
                if (index >= 0)
                {
                    containsPunctuation = true;

                    if (!endsWithPunctuation)
                    {
                        endsWithPunctuation = index == text.Length - 1;
                    }
                }
            }

            if (!containsPunctuation)
            {
                return(new[] { text });
            }

            if (!endsWithPunctuation)
            {
                text += "ȸ";
            }

            text = ExclamationWords.Apply(text);
            text = language.BetweenPunctuationReplacer.Replace(text);
            text = language.DoublePunctuationRules.Apply(text);
            text = language.QuestionMarkInQuotationRule.Apply(text);
            text = language.ExclamationMarkRules.Apply(text);

            text = ListItemReplacer.ReplaceParentheses(text);

            var result = SplitUsingSentenceBoundaryPunctuation(text, language);

            return(result);
        }