public static IReadOnlyList <string> Process(string text, ILanguage language) { text = ListItemReplacer.AddLineBreak(text); text = language.AbbreviationReplacer.Replace(text); text = language.NumberRules.Apply(text); text = ReplaceContinuousPunctuation(text, language); text = language.WithMultiplePeriodsAndEmailRule.Apply(text); text = language.GeoLocationRule.Apply(text); text = language.FileFormatRule.Apply(text); var segments = InternalSegmenter.Segment(text, language); return(segments); }
private static IReadOnlyList <string> CheckForPunctuation(string text, ILanguage language) { var containsPunctuation = false; var endsWithPunctuation = false; for (var i = 0; i < language.Punctuations.Count; i++) { var index = text.IndexOf(language.Punctuations[i], StringComparison.OrdinalIgnoreCase); if (index >= 0) { containsPunctuation = true; if (!endsWithPunctuation) { endsWithPunctuation = index == text.Length - 1; } } } if (!containsPunctuation) { return(new[] { text }); } if (!endsWithPunctuation) { text += "ȸ"; } text = ExclamationWords.Apply(text); text = language.BetweenPunctuationReplacer.Replace(text); text = language.DoublePunctuationRules.Apply(text); text = language.QuestionMarkInQuotationRule.Apply(text); text = language.ExclamationMarkRules.Apply(text); text = ListItemReplacer.ReplaceParentheses(text); var result = SplitUsingSentenceBoundaryPunctuation(text, language); return(result); }