Beispiel #1
0
 /// <summary> Returns an enumerable sentence sequence created with the specified culture. </summary>
 /// <param name="text">String to decompose</param>
 /// <param name="cultureNfo">Culture to use for decomposing</param>
 /// <returns>Sequence of sentences</returns>
 public static IEnumerable <Sentence> Sentences(this string text, CultureInfo cultureNfo)
 {
     using (SentenceIterator itr = GetSentenceIterator(text, cultureNfo)) {
         while (itr.MoveNext())
         {
             yield return(itr.Current);
         }
     }
 }
Beispiel #2
0
        /// <summary> Returns a set of detected sentences for the specified <paramref name="input"/>. </summary>
        /// <param name="input">Text to process</param>
        /// <returns>Set of sentences</returns>
        public IEnumerable <Sentence> Detect(string input)
        {
            if (string.IsNullOrEmpty(input))
            {
                yield break;
            }

            Dictionary <string, string> tokens = new Dictionary <string, string>(100);
            TokenFactory tokenFactory          = new TokenFactory(tokens);
            string       text = input;

            // First, we strip time expression
            text = _lang.StripTimeExpression(text, tokenFactory);

            // Next, we strip date expressions
            text = _lang.StripDateExpression(text, tokenFactory);

            // Then we strip abbreviations
            text = _lang.StripAbbreviations(text, tokenFactory);

            // Detect sentences and reverse any replacement
            int shift = 0;

            using (SentenceIterator itr = new SentenceIterator(text, _lang)) {
                while (itr.MoveNext())
                {
                    Sentence sentence = itr.Current;
                    string   sentTxt  = sentence.Text;
                    int      sentLgth = sentTxt.Length;
                    int      sentBgn  = sentence.Begin;

                    string rplTxt = sentTxt;
                    using (Dictionary <string, string> .Enumerator tokItr = tokens.GetEnumerator()) {
                        while (tokItr.MoveNext())
                        {
                            var    entry = tokItr.Current;
                            string token = entry.Key;
                            string expr  = entry.Value;
                            rplTxt = rplTxt.Replace(token, expr);
                        }
                    }

                    int rplLgth  = rplTxt.Length;
                    int rplDelta = rplLgth - sentLgth;
                    int rplBgn   = sentBgn + shift;
                    yield return(new Sentence(rplTxt, rplBgn));

                    shift += rplDelta;
                }
            }
        }