Пример #1
0
        /// <summary>
        /// Returns true if the given token is the name of a US state. If it is, it
        /// will add the name of the state to (word) Items in the WordRelation.
        /// </summary>
        /// <param name="tokenVal">The token string.</param>
        private bool IsStateName([In] string tokenVal)
        {
            string[] state = UsStatesMap.Get(tokenVal);
            if (state != null)
            {
                bool expandState = false;

                // check to see if the state initials are ambiguous
                // in the English language
                if (state[1].Equals("ambiguous"))
                {
                    string previous = (string)_tokenItem.FindFeature("p.name");
                    string next     = (string)_tokenItem.FindFeature("n.name");

                    int        nextLength = next.Length;
                    FeatureSet featureSet = _tokenItem.Features;

                    // check if the previous word starts with a capital letter,
                    // is at least 3 letters long, is an alphabet sequence,
                    // and has a comma.
                    bool previousIsCity =
                        (char.IsUpper(previous[0]) &&
                         previous.Length > 2 &&
                         Matches(AlphabetPattern, previous) && _tokenItem
                         .FindFeature("p.punc").Equals(","));

                    // check if next token starts with a lower case, or
                    // this is the end of sentence, or if next token
                    // is a period (".") or a zip code (5 or 10 digits).
                    bool nextIsGood =
                        (char.IsLower(next[0])) ||
                        _tokenItem.GetNext() == null ||
                        featureSet.GetString("punc").Equals(".") || ((nextLength == 5 || nextLength == 10) && Matches(
                                                                         DigitsPattern, next));

                    if (previousIsCity && nextIsGood)
                    {
                        expandState = true;
                    }
                    else
                    {
                        expandState = false;
                    }
                }
                else
                {
                    expandState = true;
                }
                if (expandState)
                {
                    for (int j = 2; j < state.Length; j++)
                    {
                        if (state[j] != null)
                        {
                            _wordRelation.AddWord(state[j]);
                        }
                    }
                    return(true);
                }
            }
            return(false);
        }
Пример #2
0
        /// <summary>
        /// Converts the given string containing "St" and "Dr" to (word) Items in the WordRelation.
        /// </summary>
        /// <param name="drStString">The string with "St" and "Dr".</param>
        private void DrStToWords(string drStString)
        {
            string street = null;
            string saint  = null;
            char   c0     = drStString[0];

            if (c0 == 's' || c0 == 'S')
            {
                street = "street";
                saint  = "saint";
            }
            else
            {
                street = "drive";
                saint  = "doctor";
            }

            FeatureSet featureSet  = _tokenItem.Features;
            string     punctuation = featureSet.GetString("punc");

            string featPunctuation = (string)_tokenItem.FindFeature("punc");

            if (_tokenItem.GetNext() == null || punctuation.IndexOf(',') != -1)
            {
                _wordRelation.AddWord(street);
            }
            else if (featPunctuation.Equals(","))
            {
                _wordRelation.AddWord(saint);
            }
            else
            {
                string pName = (string)_tokenItem.FindFeature("p.name");
                string nName = (string)_tokenItem.FindFeature("n.name");

                char p0 = pName[0];
                char n0 = nName[0];

                if (char.IsUpper(p0) && char.IsLower(n0))
                {
                    _wordRelation.AddWord(street);
                }
                else if (char.IsDigit(p0) && char.IsLower(n0))
                {
                    _wordRelation.AddWord(street);
                }
                else if (char.IsLower(p0) && char.IsUpper(n0))
                {
                    _wordRelation.AddWord(saint);
                }
                else
                {
                    string whitespace = (string)_tokenItem.FindFeature("n.whitespace");
                    if (whitespace.Equals(" "))
                    {
                        _wordRelation.AddWord(saint);
                    }
                    else
                    {
                        _wordRelation.AddWord(street);
                    }
                }
            }

            if (punctuation != null && punctuation.Equals("."))
            {
                featureSet.SetString("punc", "");
            }
        }
Пример #3
0
 /// <summary>
 /// Creates an utterance with the given set of tokenized text.
 /// </summary>
 /// <param name="tokenizer">The list of tokens for this utterance.</param>
 public Utterance(CharTokenizer tokenizer)
 {
     features  = new FeatureSet();
     relations = new FeatureSet();
     setTokenList(tokenizer);
 }
Пример #4
0
        /// <summary>
        /// Converts the given Token into (word) Items in the WordRelation.
        /// </summary>
        /// <param name="tokenVal">the string value of the token, which may or may not be
        /// same as the one in called "name" in flite</param>
        private void TokenToWords(string tokenVal)
        {
            FeatureSet tokenFeatures = _tokenItem.Features;
            string     itemName      = tokenFeatures.GetString("name");
            int        tokenLength   = tokenVal.Length;

            if (tokenFeatures.IsPresent("phones"))
            {
                _wordRelation.AddWord(tokenVal);
            }
            else if ((tokenVal.Equals("a") || tokenVal.Equals("A")) &&
                     ((_tokenItem.GetNext() == null) ||
                      !(tokenVal.Equals(itemName)) || !(((string)_tokenItem
                                                         .FindFeature("punc")).Equals(""))))
            {
                /* if A is a sub part of a token, then its ey not ah */
                _wordRelation.AddWord("_a");
            }
            else if (Matches(AlphabetPattern, tokenVal))
            {
                if (Matches(RomanNumbersPattern, tokenVal))
                {
                    /* XVIII */
                    RomanToWords(tokenVal);
                }
                else if (Matches(IllionPattern, tokenVal) &&
                         Matches(UsMoneyPattern,
                                 (string)_tokenItem.FindFeature("p.name")))
                {
                    /* $ X -illion */
                    _wordRelation.AddWord(tokenVal);
                    _wordRelation.AddWord("dollars");
                }
                else if (Matches(DrStPattern, tokenVal))
                {
                    /* St Andrew's St, Dr King Dr */
                    DrStToWords(tokenVal);
                }
                else if (tokenVal.Equals("Mr"))
                {
                    _tokenItem.Features.SetString("punc", "");
                    _wordRelation.AddWord("mister");
                }
                else if (tokenVal.Equals("Mrs"))
                {
                    _tokenItem.Features.SetString("punc", "");
                    _wordRelation.AddWord("missus");
                }
                else if (tokenLength == 1 &&
                         char.IsUpper(tokenVal[0]) &&
                         ((string)_tokenItem.FindFeature("n.whitespace"))
                         .Equals(" ") &&
                         char.IsUpper(((string)_tokenItem
                                       .FindFeature("n.name"))[0]))
                {
                    tokenFeatures.SetString("punc", "");
                    string aaa = tokenVal.ToLower();
                    if (aaa.Equals("a"))
                    {
                        _wordRelation.AddWord("_a");
                    }
                    else
                    {
                        _wordRelation.AddWord(aaa);
                    }
                }
                else if (IsStateName(tokenVal))
                {
                    /*
                     * The name of a US state isStateName() has already added the
                     * full name of the state, so we're all set.
                     */
                }
                else if (tokenLength > 1 && !IsPronounceable(tokenVal))
                {
                    /* Need common exception list */
                    /* unpronouncable list of alphas */
                    NumberExpander.ExpandLetters(tokenVal, _wordRelation);
                }
                else
                {
                    /* just a word */
                    _wordRelation.AddWord(tokenVal.ToLower());
                }
            }
            else if (Matches(DottedAbbrevPattern, tokenVal))
            {
                /* U.S.A. */
                // remove all dots
                NumberExpander.ExpandLetters(tokenVal.Replace(".", ""),
                                             _wordRelation);
            }
            else if (Matches(CommaIntPattern, tokenVal))
            {
                /* 99,999,999 */
                NumberExpander.ExpandReal(tokenVal.Replace(",", "").Replace("'", ""), _wordRelation);
            }
            else if (Matches(SevenPhoneNumberPattern, tokenVal))
            {
                /* 234-3434 telephone numbers */
                int    dashIndex = tokenVal.IndexOf('-');
                string aaa       = tokenVal.JSubString(0, dashIndex);
                string bbb       = tokenVal.Substring(dashIndex + 1);

                NumberExpander.ExpandDigits(aaa, _wordRelation);
                _wordRelation.AddBreak();
                NumberExpander.ExpandDigits(bbb, _wordRelation);
            }
            else if (MatchesPartPhoneNumber(tokenVal))
            {
                /* part of a telephone number */
                var punctuation = (string)_tokenItem.FindFeature("punc");
                if (punctuation.Equals(""))
                {
                    _tokenItem.Features.SetString("punc", ",");
                }
                NumberExpander.ExpandDigits(tokenVal, _wordRelation);
                _wordRelation.AddBreak();
            }
            else if (Matches(NumberTimePattern, tokenVal))
            {
                /* 12:35 */
                int    colonIndex = tokenVal.IndexOf(':');
                string aaa        = tokenVal.JSubString(0, colonIndex);
                string bbb        = tokenVal.Substring(colonIndex + 1);

                NumberExpander.ExpandNumber(aaa, _wordRelation);
                if (!(bbb.Equals("00")))
                {
                    NumberExpander.ExpandId(bbb, _wordRelation);
                }
            }
            else if (Matches(Digits2DashPattern, tokenVal))
            {
                /* 999-999-999 */
                DigitsDashToWords(tokenVal);
            }
            else if (Matches(DigitsPattern, tokenVal))
            {
                DigitsToWords(tokenVal);
            }
            else if (tokenLength == 1 &&
                     char.IsUpper(tokenVal[0]) &&
                     ((string)_tokenItem.FindFeature("n.whitespace"))
                     .Equals(" ") &&
                     char.IsUpper(((string)_tokenItem
                                   .FindFeature("n.name"))[0]))
            {
                tokenFeatures.SetString("punc", "");
                string aaa = tokenVal.ToLower();
                if (aaa.Equals("a"))
                {
                    _wordRelation.AddWord("_a");
                }
                else
                {
                    _wordRelation.AddWord(aaa);
                }
            }
            else if (Matches(DoublePattern, tokenVal))
            {
                NumberExpander.ExpandReal(tokenVal, _wordRelation);
            }
            else if (Matches(OrdinalPattern, tokenVal))
            {
                /* explicit ordinals */
                string aaa = tokenVal.JSubString(0, tokenLength - 2);
                NumberExpander.ExpandOrdinal(aaa, _wordRelation);
            }
            else if (Matches(UsMoneyPattern, tokenVal))
            {
                /* US money */
                UsMoneyToWords(tokenVal);
            }
            else if (tokenLength > 0 && tokenVal[tokenLength - 1] == '%')
            {
                /* Y% */
                TokenToWords(tokenVal.JSubString(0, tokenLength - 1));
                _wordRelation.AddWord("percent");
            }
            else if (Matches(NumessPattern, tokenVal))
            {
                NumberExpander.ExpandNumess(tokenVal.JSubString(0, tokenLength - 1), _wordRelation);
            }
            else if (Matches(DigitsSlashDigitsPattern, tokenVal) && tokenVal.Equals(itemName))
            {
                DigitsSlashDigitsToWords(tokenVal);
            }
            else if (tokenVal.IndexOf('-') != -1)
            {
                DashToWords(tokenVal);
            }
            else if (tokenLength > 1 && !Matches(AlphabetPattern, tokenVal))
            {
                NotJustAlphasToWords(tokenVal);
            }
            else if (tokenVal.Equals("&"))
            {
                // &
                _wordRelation.AddWord("and");
            }
            else if (tokenVal.Equals("-"))
            {
                // Skip it
            }
            else
            {
                // Just a word.
                _wordRelation.AddWord(tokenVal.ToLower());
            }
        }
Пример #5
0
        private static float getSegmentEnd(Item segment)
        {
            FeatureSet segmentFeatureSet = segment.getFeatures();

            return(segmentFeatureSet.getFloat("end"));
        }