/// ------------------------------------------------------------------------------------ public AmbiguousSeq(string unit) { Literal = unit; if (string.IsNullOrEmpty(unit)) { return; } // Find the first base character starting from the end of // the string. Make that character the base character for the unit. for (int i = unit.Length - 1; i >= 0; i--) { IPASymbol charInfo = App.IPASymbolCache[unit[i]]; if (charInfo != null && charInfo.IsBase) { BaseChar = charInfo.Literal; return; } } // If we got this far, then we didn't find a candidate for the base character. // In that case, see if any of the characters are not defined in the phonetic // character inventory. If so, then use the first one we encounter as the base. for (int i = unit.Length - 1; i >= 0; i--) { if (App.IPASymbolCache[unit[i]] == null) { BaseChar = unit[i].ToString(); return; } } }
/// ------------------------------------------------------------------------------------ private void InitializeBaseChar(string phone) { if (CheckIfAmbiguous(phone)) { return; } var bldr = new StringBuilder(); IPASymbol firstChar = null; IPASymbol lastChar = null; foreach (char c in phone) { var charInfo = App.IPASymbolCache[c]; if (charInfo != null && charInfo.IsBase) { if (charInfo.Type == IPASymbolType.consonant) { bldr.Append('c'); } else if (charInfo.Type == IPASymbolType.vowel) { bldr.Append('v'); } if (firstChar == null) { firstChar = charInfo; } lastChar = charInfo; } } if (bldr.Length == 0) { if (firstChar != null && CharType == IPASymbolType.notApplicable) { CharType = firstChar.Type; } return; } if (bldr.Replace("c", string.Empty).Length == 0) { // When the sequence of base char. symbols are all consonants, // then use the last symbol as the base character. _baseChar = lastChar.Literal[0]; CharType = IPASymbolType.consonant; } else { // The sequence of base char. symbols are not all consonants, // so use the first symbol as the base character. _baseChar = firstChar.Literal[0]; CharType = IPASymbolType.vowel; } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Parses the specified phonetic string into an array of phones. /// </summary> /// ------------------------------------------------------------------------------------ public string[] Parse(string phonetic, bool normalize, bool convertExperimentalTranscriptions, out Dictionary <int, string[]> uncertainPhones) { uncertainPhones = null; // Return an empty array if there's nothing in the phonetic. if (string.IsNullOrEmpty(phonetic)) { return(null); } var phones = new List <string>(phonetic.Length); IPASymbol ciPrev = null; // Normalize the string if necessary. if (normalize) { phonetic = FFNormalizer.Normalize(phonetic); } var origPhoneticRunBeingParsed = phonetic; if (convertExperimentalTranscriptions) { phonetic = _transcriptionChanges.Convert(phonetic); } phonetic = MarkAmbiguousSequences(phonetic); int phoneStart = 0; bool hasBaseChar = false; for (int i = 0; i < phonetic.Length; i++) { char c = phonetic[i]; char badChar = '\0'; // Check if we've run into a marker indicating // the beginning of an ambiguous sequence. if (c == kParseTokenMarker) { // First, close the previous phone if there is one. if (i > phoneStart) { phones.Add(phonetic.Substring(phoneStart, i - phoneStart)); } var ambigPhone = _sortedAmbiguousSeqList.GetAmbigSeqForToken(phonetic[++i]); if (!string.IsNullOrEmpty(ambigPhone)) { phones.Add(ambigPhone); } phoneStart = i + 1; continue; } // Get the information for the current codepoint. var ciCurr = App.IPASymbolCache[c]; // If there's no information for a code point or there is but there isn't // any for the previous character and the current character isn't a base // character, then treat the character as it's own phone. if (ciCurr == null || ciCurr.Type == IPASymbolType.notApplicable) { if (i > phoneStart) { phones.Add(phonetic.Substring(phoneStart, i - phoneStart)); } // Check if we're at the beginning of an uncertain phone group if (c != '(') { phoneStart = i + 1; badChar = c; } else { int index = i + 1; var primaryPhone = GetUncertainties(phonetic, ref index, phones.Count, ref uncertainPhones, origPhoneticRunBeingParsed); // Primary phone should only be null when no slash was found // between the parentheses. In that situation, the parentheses are // not considered to be surrounding a group of uncertain phones. if (primaryPhone == null) { badChar = c; } else { phones.Add(primaryPhone); i = index; } phoneStart = i + 1; } ciPrev = null; if (badChar != '\0') { // Log the undefined character. if (LogUndefinedCharactersWhenParsing && App.IPASymbolCache.UndefinedCharacters != null) { App.IPASymbolCache.UndefinedCharacters.Add(c, origPhoneticRunBeingParsed); } phones.Add(c.ToString()); } continue; } // If we've encountered a non base character but nothing precedes it, // then it must be a diacritic at the beginning of the phonetic // transcription so just put it with the following characters. if (ciPrev == null && !ciCurr.IsBase) { continue; } // Is the previous codepoint special in that it's not a base character // but a base character must follow it in the same phone (e.g. a tie bar)? // If yes, then make sure the current codepoint is a base character or // throw it away. if (ciPrev != null && (!hasBaseChar || ciPrev.CombinesBaseCharacters) && ciPrev.CanPrecedeBase) { ciPrev = ciCurr; continue; } // At this point, if the current codepoint is a base character and // it's not the first in the string, close the previous phone. If // ciCurr.IsBase && i > phoneStart but ciPrev == null then it means // we've run across some non base characters at the beginning of the // transcription that aren't attached to a base character. Therefore, // attach them to the first base character that's found. In that case, // we don't want to add the phone to the collection yet. We'll wait // until we come across the beginning of the next phone. if (ciCurr.IsBase && i > phoneStart && ciPrev != null) { phones.Add(phonetic.Substring(phoneStart, i - phoneStart)); phoneStart = i; hasBaseChar = false; } ciPrev = ciCurr; hasBaseChar |= ciCurr.IsBase; } // Save the last phone if (phoneStart < phonetic.Length) { phones.Add(phonetic.Substring(phoneStart)); } return(phones.ToArray()); }