/// <summary> /// Convert the given digit token into (word) Items in the WordRelation. /// </summary> /// <param name="tokenVal">The digit string.</param> private void DigitsToWords(string tokenVal) { FeatureSet featureSet = _tokenItem.Features; string nsw = ""; if (featureSet.IsPresent("nsw")) { nsw = featureSet.GetString("nsw"); } if (nsw.Equals("nide")) { NumberExpander.ExpandId(tokenVal, _wordRelation); } else { string rName = featureSet.GetString("name"); string digitsType = null; if (tokenVal.Equals(rName)) { digitsType = (string)_cart.Interpret(_tokenItem); } else { featureSet.SetString("name", tokenVal); digitsType = (string)_cart.Interpret(_tokenItem); featureSet.SetString("name", rName); } if (digitsType.Equals("ordinal")) { NumberExpander.ExpandOrdinal(tokenVal, _wordRelation); } else if (digitsType.Equals("digits")) { NumberExpander.ExpandDigits(tokenVal, _wordRelation); } else if (digitsType.Equals("year")) { NumberExpander.ExpandId(tokenVal, _wordRelation); } else { NumberExpander.ExpandNumber(tokenVal, _wordRelation); } } }
/// <summary> /// Convert the given string (which does not only consist of alphabet) into (word) Items in the WordRelation. /// </summary> /// <param name="tokenVal">The string.</param> private void NotJustAlphasToWords(string tokenVal) { /* its not just alphas */ int index = 0; int tokenLength = tokenVal.Length; for (; index < tokenLength - 1; index++) { if (IsTextSplitable(tokenVal, index)) { break; } } if (index == tokenLength - 1) { _wordRelation.AddWord(tokenVal.ToLower()); return; } string aaa = tokenVal.JSubString(0, index + 1); string bbb = tokenVal.JSubString(index + 1, tokenLength); FeatureSet featureSet = _tokenItem.Features; featureSet.SetString("nsw", "nide"); TokenToWords(aaa); TokenToWords(bbb); }
/// <summary> /// Convert the given dashed string (e.g. "aaa-bbb") into (word) Items in the WordRelation. /// </summary> /// <param name="tokenVal">The dashed string.</param> private void DashToWords([In] string tokenVal) { int index = tokenVal.IndexOf('-'); string aaa = tokenVal.JSubString(0, index); string bbb = tokenVal.JSubString(index + 1, tokenVal.Length); if (Matches(DigitsPattern, aaa) && Matches(DigitsPattern, bbb)) { FeatureSet featureSet = _tokenItem.Features; featureSet.SetString("name", aaa); TokenToWords(aaa); _wordRelation.AddWord("to"); featureSet.SetString("name", bbb); TokenToWords(bbb); featureSet.SetString("name", ""); } else { TokenToWords(aaa); TokenToWords(bbb); } }
/// <summary> /// Converts the given string containing "St" and "Dr" to (word) Items in the WordRelation. /// </summary> /// <param name="drStString">The string with "St" and "Dr".</param> private void DrStToWords(string drStString) { string street = null; string saint = null; char c0 = drStString[0]; if (c0 == 's' || c0 == 'S') { street = "street"; saint = "saint"; } else { street = "drive"; saint = "doctor"; } FeatureSet featureSet = _tokenItem.Features; string punctuation = featureSet.GetString("punc"); string featPunctuation = (string)_tokenItem.FindFeature("punc"); if (_tokenItem.GetNext() == null || punctuation.IndexOf(',') != -1) { _wordRelation.AddWord(street); } else if (featPunctuation.Equals(",")) { _wordRelation.AddWord(saint); } else { string pName = (string)_tokenItem.FindFeature("p.name"); string nName = (string)_tokenItem.FindFeature("n.name"); char p0 = pName[0]; char n0 = nName[0]; if (char.IsUpper(p0) && char.IsLower(n0)) { _wordRelation.AddWord(street); } else if (char.IsDigit(p0) && char.IsLower(n0)) { _wordRelation.AddWord(street); } else if (char.IsLower(p0) && char.IsUpper(n0)) { _wordRelation.AddWord(saint); } else { string whitespace = (string)_tokenItem.FindFeature("n.whitespace"); if (whitespace.Equals(" ")) { _wordRelation.AddWord(saint); } else { _wordRelation.AddWord(street); } } } if (punctuation != null && punctuation.Equals(".")) { featureSet.SetString("punc", ""); } }
/// <summary> /// Converts the given Token into (word) Items in the WordRelation. /// </summary> /// <param name="tokenVal">the string value of the token, which may or may not be /// same as the one in called "name" in flite</param> private void TokenToWords(string tokenVal) { FeatureSet tokenFeatures = _tokenItem.Features; string itemName = tokenFeatures.GetString("name"); int tokenLength = tokenVal.Length; if (tokenFeatures.IsPresent("phones")) { _wordRelation.AddWord(tokenVal); } else if ((tokenVal.Equals("a") || tokenVal.Equals("A")) && ((_tokenItem.GetNext() == null) || !(tokenVal.Equals(itemName)) || !(((string)_tokenItem .FindFeature("punc")).Equals("")))) { /* if A is a sub part of a token, then its ey not ah */ _wordRelation.AddWord("_a"); } else if (Matches(AlphabetPattern, tokenVal)) { if (Matches(RomanNumbersPattern, tokenVal)) { /* XVIII */ RomanToWords(tokenVal); } else if (Matches(IllionPattern, tokenVal) && Matches(UsMoneyPattern, (string)_tokenItem.FindFeature("p.name"))) { /* $ X -illion */ _wordRelation.AddWord(tokenVal); _wordRelation.AddWord("dollars"); } else if (Matches(DrStPattern, tokenVal)) { /* St Andrew's St, Dr King Dr */ DrStToWords(tokenVal); } else if (tokenVal.Equals("Mr")) { _tokenItem.Features.SetString("punc", ""); _wordRelation.AddWord("mister"); } else if (tokenVal.Equals("Mrs")) { _tokenItem.Features.SetString("punc", ""); _wordRelation.AddWord("missus"); } else if (tokenLength == 1 && char.IsUpper(tokenVal[0]) && ((string)_tokenItem.FindFeature("n.whitespace")) .Equals(" ") && char.IsUpper(((string)_tokenItem .FindFeature("n.name"))[0])) { tokenFeatures.SetString("punc", ""); string aaa = tokenVal.ToLower(); if (aaa.Equals("a")) { _wordRelation.AddWord("_a"); } else { _wordRelation.AddWord(aaa); } } else if (IsStateName(tokenVal)) { /* * The name of a US state isStateName() has already added the * full name of the state, so we're all set. */ } else if (tokenLength > 1 && !IsPronounceable(tokenVal)) { /* Need common exception list */ /* unpronouncable list of alphas */ NumberExpander.ExpandLetters(tokenVal, _wordRelation); } else { /* just a word */ _wordRelation.AddWord(tokenVal.ToLower()); } } else if (Matches(DottedAbbrevPattern, tokenVal)) { /* U.S.A. */ // remove all dots NumberExpander.ExpandLetters(tokenVal.Replace(".", ""), _wordRelation); } else if (Matches(CommaIntPattern, tokenVal)) { /* 99,999,999 */ NumberExpander.ExpandReal(tokenVal.Replace(",", "").Replace("'", ""), _wordRelation); } else if (Matches(SevenPhoneNumberPattern, tokenVal)) { /* 234-3434 telephone numbers */ int dashIndex = tokenVal.IndexOf('-'); string aaa = tokenVal.JSubString(0, dashIndex); string bbb = tokenVal.Substring(dashIndex + 1); NumberExpander.ExpandDigits(aaa, _wordRelation); _wordRelation.AddBreak(); NumberExpander.ExpandDigits(bbb, _wordRelation); } else if (MatchesPartPhoneNumber(tokenVal)) { /* part of a telephone number */ var punctuation = (string)_tokenItem.FindFeature("punc"); if (punctuation.Equals("")) { _tokenItem.Features.SetString("punc", ","); } NumberExpander.ExpandDigits(tokenVal, _wordRelation); _wordRelation.AddBreak(); } else if (Matches(NumberTimePattern, tokenVal)) { /* 12:35 */ int colonIndex = tokenVal.IndexOf(':'); string aaa = tokenVal.JSubString(0, colonIndex); string bbb = tokenVal.Substring(colonIndex + 1); NumberExpander.ExpandNumber(aaa, _wordRelation); if (!(bbb.Equals("00"))) { NumberExpander.ExpandId(bbb, _wordRelation); } } else if (Matches(Digits2DashPattern, tokenVal)) { /* 999-999-999 */ DigitsDashToWords(tokenVal); } else if (Matches(DigitsPattern, tokenVal)) { DigitsToWords(tokenVal); } else if (tokenLength == 1 && char.IsUpper(tokenVal[0]) && ((string)_tokenItem.FindFeature("n.whitespace")) .Equals(" ") && char.IsUpper(((string)_tokenItem .FindFeature("n.name"))[0])) { tokenFeatures.SetString("punc", ""); string aaa = tokenVal.ToLower(); if (aaa.Equals("a")) { _wordRelation.AddWord("_a"); } else { _wordRelation.AddWord(aaa); } } else if (Matches(DoublePattern, tokenVal)) { NumberExpander.ExpandReal(tokenVal, _wordRelation); } else if (Matches(OrdinalPattern, tokenVal)) { /* explicit ordinals */ string aaa = tokenVal.JSubString(0, tokenLength - 2); NumberExpander.ExpandOrdinal(aaa, _wordRelation); } else if (Matches(UsMoneyPattern, tokenVal)) { /* US money */ UsMoneyToWords(tokenVal); } else if (tokenLength > 0 && tokenVal[tokenLength - 1] == '%') { /* Y% */ TokenToWords(tokenVal.JSubString(0, tokenLength - 1)); _wordRelation.AddWord("percent"); } else if (Matches(NumessPattern, tokenVal)) { NumberExpander.ExpandNumess(tokenVal.JSubString(0, tokenLength - 1), _wordRelation); } else if (Matches(DigitsSlashDigitsPattern, tokenVal) && tokenVal.Equals(itemName)) { DigitsSlashDigitsToWords(tokenVal); } else if (tokenVal.IndexOf('-') != -1) { DashToWords(tokenVal); } else if (tokenLength > 1 && !Matches(AlphabetPattern, tokenVal)) { NotJustAlphasToWords(tokenVal); } else if (tokenVal.Equals("&")) { // & _wordRelation.AddWord("and"); } else if (tokenVal.Equals("-")) { // Skip it } else { // Just a word. _wordRelation.AddWord(tokenVal.ToLower()); } }