/// <summary> Returns an enumerable sentence sequence created with the specified culture. </summary> /// <param name="text">String to decompose</param> /// <param name="cultureNfo">Culture to use for decomposing</param> /// <returns>Sequence of sentences</returns> public static IEnumerable <Sentence> Sentences(this string text, CultureInfo cultureNfo) { using (SentenceIterator itr = GetSentenceIterator(text, cultureNfo)) { while (itr.MoveNext()) { yield return(itr.Current); } } }
/// <summary> Returns a set of detected sentences for the specified <paramref name="input"/>. </summary> /// <param name="input">Text to process</param> /// <returns>Set of sentences</returns> public IEnumerable <Sentence> Detect(string input) { if (string.IsNullOrEmpty(input)) { yield break; } Dictionary <string, string> tokens = new Dictionary <string, string>(100); TokenFactory tokenFactory = new TokenFactory(tokens); string text = input; // First, we strip time expression text = _lang.StripTimeExpression(text, tokenFactory); // Next, we strip date expressions text = _lang.StripDateExpression(text, tokenFactory); // Then we strip abbreviations text = _lang.StripAbbreviations(text, tokenFactory); // Detect sentences and reverse any replacement int shift = 0; using (SentenceIterator itr = new SentenceIterator(text, _lang)) { while (itr.MoveNext()) { Sentence sentence = itr.Current; string sentTxt = sentence.Text; int sentLgth = sentTxt.Length; int sentBgn = sentence.Begin; string rplTxt = sentTxt; using (Dictionary <string, string> .Enumerator tokItr = tokens.GetEnumerator()) { while (tokItr.MoveNext()) { var entry = tokItr.Current; string token = entry.Key; string expr = entry.Value; rplTxt = rplTxt.Replace(token, expr); } } int rplLgth = rplTxt.Length; int rplDelta = rplLgth - sentLgth; int rplBgn = sentBgn + shift; yield return(new Sentence(rplTxt, rplBgn)); shift += rplDelta; } } }