Example #1
0
        /// <summary>
        /// Convert the given digit token into (word) Items in the WordRelation.
        /// </summary>
        /// <param name="tokenVal">The digit string.</param>
        private void digitsToWords(string tokenVal)
        {
            FeatureSet featureSet = tokenItem.getFeatures();
            string     nsw        = "";

            if (featureSet.isPresent("nsw"))
            {
                nsw = featureSet.getString("nsw");
            }

            if (nsw.Equals("nide"))
            {
                NumberExpander.expandID(tokenVal, wordRelation);
            }
            else
            {
                string rName      = featureSet.getString("name");
                string digitsType = null;

                if (tokenVal.Equals(rName))
                {
                    digitsType = (string)cart.interpret(tokenItem);
                }
                else
                {
                    featureSet.setString("name", tokenVal);
                    digitsType = (string)cart.interpret(tokenItem);
                    featureSet.setString("name", rName);
                }

                if (digitsType.Equals("ordinal"))
                {
                    NumberExpander.expandOrdinal(tokenVal, wordRelation);
                }
                else if (digitsType.Equals("digits"))
                {
                    NumberExpander.expandDigits(tokenVal, wordRelation);
                }
                else if (digitsType.Equals("year"))
                {
                    NumberExpander.expandID(tokenVal, wordRelation);
                }
                else
                {
                    NumberExpander.expandNumber(tokenVal, wordRelation);
                }
            }
        }
Example #2
0
        /// <summary>
        /// Convert the given string (which does not only consist of alphabet) into (word) Items in the WordRelation.
        /// </summary>
        /// <param name="tokenVal">The string.</param>
        private void notJustAlphasToWords(string tokenVal)
        {
            /* its not just alphas */
            int index       = 0;
            int tokenLength = tokenVal.Length;

            for (; index < tokenLength - 1; index++)
            {
                if (isTextSplitable(tokenVal, index))
                {
                    break;
                }
            }
            if (index == tokenLength - 1)
            {
                wordRelation.addWord(tokenVal.ToLower());
                return;
            }

            string aaa = tokenVal.Substring(0, index + 1);
            string bbb = tokenVal.Substring(index + 1, tokenLength);

            FeatureSet featureSet = tokenItem.getFeatures();

            featureSet.setString("nsw", "nide");
            tokenToWords(aaa);
            tokenToWords(bbb);
        }
Example #3
0
        /// <summary>
        /// Convert the given dashed string (e.g. "aaa-bbb") into (word) Items in the WordRelation.
        /// </summary>
        /// <param name="tokenVal">The dashed string.</param>
        private void dashToWords([In] string tokenVal)
        {
            int    index = tokenVal.IndexOf('-');
            string aaa   = tokenVal.Substring(0, index);
            string bbb   = tokenVal.Substring(index + 1, tokenVal.Length);

            if (matches(digitsPattern, aaa) && matches(digitsPattern, bbb))
            {
                FeatureSet featureSet = tokenItem.getFeatures();
                featureSet.setString("name", aaa);
                tokenToWords(aaa);
                wordRelation.addWord("to");
                featureSet.setString("name", bbb);
                tokenToWords(bbb);
                featureSet.setString("name", "");
            }
            else
            {
                tokenToWords(aaa);
                tokenToWords(bbb);
            }
        }
Example #4
0
        /// <summary>
        /// Sets the token list for this utterance. Note that this could be
        /// optimized by turning the token list directly into the token relation.
        /// </summary>
        /// <param name="tokenizer">The tokenList.</param>
        private void setTokenList(IEnumerator <Token> tokenizer)
        {
            Relation relation = createRelation(Relation.TOKEN);

            while (tokenizer.MoveNext())
            {
                Token  token     = tokenizer.Current;
                string tokenWord = token.getWord();

                if (!string.IsNullOrEmpty(tokenWord))
                {
                    Item item = relation.appendItem();

                    FeatureSet featureSet = item.getFeatures();
                    featureSet.setString("name", tokenWord);
                    featureSet.setString("whitespace", token.getWhitespace());
                    featureSet.setString("prepunctuation",
                                         token.getPrepunctuation());
                    featureSet.setString("punc", token.getPostpunctuation());
                    featureSet.setString("file_pos", token.getPosition().ToString(CultureInfo.InvariantCulture));
                    featureSet.setString("line_number", token.getLineNumber().ToString(CultureInfo.InvariantCulture));
                }
            }
        }
Example #5
0
 /// <summary>
 /// Convenience method that sets the named feature as a String.
 /// </summary>
 /// <param name="name">The name of the feature.</param>
 /// <param name="value">The value of the feature.</param>
 public virtual void setString(string name, string value)
 {
     features.setString(name, value);
 }
Example #6
0
        /// <summary>
        /// Converts the given string containing "St" and "Dr" to (word) Items in the WordRelation.
        /// </summary>
        /// <param name="drStString">The string with "St" and "Dr".</param>
        private void drStToWords(string drStString)
        {
            string street = null;
            string saint  = null;
            char   c0     = drStString[0];

            if (c0 == 's' || c0 == 'S')
            {
                street = "street";
                saint  = "saint";
            }
            else
            {
                street = "drive";
                saint  = "doctor";
            }

            FeatureSet featureSet  = tokenItem.getFeatures();
            string     punctuation = featureSet.getString("punc");

            string featPunctuation = (string)tokenItem.findFeature("punc");

            if (tokenItem.getNext() == null || punctuation.IndexOf(',') != -1)
            {
                wordRelation.addWord(street);
            }
            else if (featPunctuation.Equals(","))
            {
                wordRelation.addWord(saint);
            }
            else
            {
                string pName = (string)tokenItem.findFeature("p.name");
                string nName = (string)tokenItem.findFeature("n.name");

                char p0 = pName[0];
                char n0 = nName[0];

                if (char.IsUpper(p0) && char.IsLower(n0))
                {
                    wordRelation.addWord(street);
                }
                else if (char.IsDigit(p0) && char.IsLower(n0))
                {
                    wordRelation.addWord(street);
                }
                else if (char.IsLower(p0) && char.IsUpper(n0))
                {
                    wordRelation.addWord(saint);
                }
                else
                {
                    string whitespace = (string)tokenItem.findFeature("n.whitespace");
                    if (whitespace.Equals(" "))
                    {
                        wordRelation.addWord(saint);
                    }
                    else
                    {
                        wordRelation.addWord(street);
                    }
                }
            }

            if (punctuation != null && punctuation.Equals("."))
            {
                featureSet.setString("punc", "");
            }
        }
Example #7
0
        /// <summary>
        /// Converts the given Token into (word) Items in the WordRelation.
        /// </summary>
        /// <param name="tokenVal">the string value of the token, which may or may not be
        /// same as the one in called "name" in flite</param>
        private void tokenToWords(string tokenVal)
        {
            FeatureSet tokenFeatures = tokenItem.getFeatures();
            string     itemName      = tokenFeatures.getString("name");
            int        tokenLength   = tokenVal.Length;

            if (tokenFeatures.isPresent("phones"))
            {
                wordRelation.addWord(tokenVal);
            }
            else if ((tokenVal.Equals("a") || tokenVal.Equals("A")) &&
                     ((tokenItem.getNext() == null) ||
                      !(tokenVal.Equals(itemName)) || !(((string)tokenItem
                                                         .findFeature("punc")).Equals(""))))
            {
                /* if A is a sub part of a token, then its ey not ah */
                wordRelation.addWord("_a");
            }
            else if (matches(alphabetPattern, tokenVal))
            {
                if (matches(romanNumbersPattern, tokenVal))
                {
                    /* XVIII */
                    romanToWords(tokenVal);
                }
                else if (matches(illionPattern, tokenVal) &&
                         matches(usMoneyPattern,
                                 (string)tokenItem.findFeature("p.name")))
                {
                    /* $ X -illion */
                    wordRelation.addWord(tokenVal);
                    wordRelation.addWord("dollars");
                }
                else if (matches(drStPattern, tokenVal))
                {
                    /* St Andrew's St, Dr King Dr */
                    drStToWords(tokenVal);
                }
                else if (tokenVal.Equals("Mr"))
                {
                    tokenItem.getFeatures().setString("punc", "");
                    wordRelation.addWord("mister");
                }
                else if (tokenVal.Equals("Mrs"))
                {
                    tokenItem.getFeatures().setString("punc", "");
                    wordRelation.addWord("missus");
                }
                else if (tokenLength == 1 &&
                         char.IsUpper(tokenVal[0]) &&
                         ((string)tokenItem.findFeature("n.whitespace"))
                         .Equals(" ") &&
                         char.IsUpper(((string)tokenItem
                                       .findFeature("n.name"))[0]))
                {
                    tokenFeatures.setString("punc", "");
                    string aaa = tokenVal.ToLower();
                    if (aaa.Equals("a"))
                    {
                        wordRelation.addWord("_a");
                    }
                    else
                    {
                        wordRelation.addWord(aaa);
                    }
                }
                else if (isStateName(tokenVal))
                {
                    /*
                     * The name of a US state isStateName() has already added the
                     * full name of the state, so we're all set.
                     */
                }
                else if (tokenLength > 1 && !isPronounceable(tokenVal))
                {
                    /* Need common exception list */
                    /* unpronouncable list of alphas */
                    NumberExpander.expandLetters(tokenVal, wordRelation);
                }
                else
                {
                    /* just a word */
                    wordRelation.addWord(tokenVal.ToLower());
                }
            }
            else if (matches(dottedAbbrevPattern, tokenVal))
            {
                /* U.S.A. */
                // remove all dots
                NumberExpander.expandLetters(tokenVal.Replace(".", ""),
                                             wordRelation);
            }
            else if (matches(commaIntPattern, tokenVal))
            {
                /* 99,999,999 */
                NumberExpander.expandReal(tokenVal.Replace(",", "").Replace("'", ""), wordRelation);
            }
            else if (matches(sevenPhoneNumberPattern, tokenVal))
            {
                /* 234-3434 telephone numbers */
                int    dashIndex = tokenVal.IndexOf('-');
                string aaa       = tokenVal.Substring(0, dashIndex);
                string bbb       = tokenVal.Substring(dashIndex + 1);

                NumberExpander.expandDigits(aaa, wordRelation);
                wordRelation.addBreak();
                NumberExpander.expandDigits(bbb, wordRelation);
            }
            else if (matchesPartPhoneNumber(tokenVal))
            {
                /* part of a telephone number */
                string punctuation = (string)tokenItem.findFeature("punc");
                if (punctuation.Equals(""))
                {
                    tokenItem.getFeatures().setString("punc", ",");
                }
                NumberExpander.expandDigits(tokenVal, wordRelation);
                wordRelation.addBreak();
            }
            else if (matches(numberTimePattern, tokenVal))
            {
                /* 12:35 */
                int    colonIndex = tokenVal.IndexOf(':');
                string aaa        = tokenVal.Substring(0, colonIndex);
                string bbb        = tokenVal.Substring(colonIndex + 1);

                NumberExpander.expandNumber(aaa, wordRelation);
                if (!(bbb.Equals("00")))
                {
                    NumberExpander.expandID(bbb, wordRelation);
                }
            }
            else if (matches(digits2DashPattern, tokenVal))
            {
                /* 999-999-999 */
                digitsDashToWords(tokenVal);
            }
            else if (matches(digitsPattern, tokenVal))
            {
                digitsToWords(tokenVal);
            }
            else if (tokenLength == 1 &&
                     char.IsUpper(tokenVal[0]) &&
                     ((string)tokenItem.findFeature("n.whitespace"))
                     .Equals(" ") &&
                     char.IsUpper(((string)tokenItem
                                   .findFeature("n.name"))[0]))
            {
                tokenFeatures.setString("punc", "");
                string aaa = tokenVal.ToLower();
                if (aaa.Equals("a"))
                {
                    wordRelation.addWord("_a");
                }
                else
                {
                    wordRelation.addWord(aaa);
                }
            }
            else if (matches(doublePattern, tokenVal))
            {
                NumberExpander.expandReal(tokenVal, wordRelation);
            }
            else if (matches(ordinalPattern, tokenVal))
            {
                /* explicit ordinals */
                string aaa = tokenVal.Substring(0, tokenLength - 2);
                NumberExpander.expandOrdinal(aaa, wordRelation);
            }
            else if (matches(usMoneyPattern, tokenVal))
            {
                /* US money */
                usMoneyToWords(tokenVal);
            }
            else if (tokenLength > 0 && tokenVal[tokenLength - 1] == '%')
            {
                /* Y% */
                tokenToWords(tokenVal.Substring(0, tokenLength - 1));
                wordRelation.addWord("percent");
            }
            else if (matches(numessPattern, tokenVal))
            {
                NumberExpander.expandNumess(tokenVal.Substring(0, tokenLength - 1), wordRelation);
            }
            else if (matches(digitsSlashDigitsPattern, tokenVal) &&
                     tokenVal.Equals(itemName))
            {
                digitsSlashDigitsToWords(tokenVal);
            }
            else if (tokenVal.IndexOf('-') != -1)
            {
                dashToWords(tokenVal);
            }
            else if (tokenLength > 1 && !matches(alphabetPattern, tokenVal))
            {
                notJustAlphasToWords(tokenVal);
            }
            else if (tokenVal.Equals("&"))
            {
                // &
                wordRelation.addWord("and");
            }
            else if (tokenVal.Equals("-"))
            {
                // Skip it
            }
            else
            {
                // Just a word.
                wordRelation.addWord(tokenVal.ToLower());
            }
        }