/// ------------------------------------------------------------------------------------
        public AmbiguousSeq(string unit)
        {
            Literal = unit;

            if (string.IsNullOrEmpty(unit))
            {
                return;
            }

            // Find the first base character starting from the end of
            // the string. Make that character the base character for the unit.
            for (int i = unit.Length - 1; i >= 0; i--)
            {
                IPASymbol charInfo = App.IPASymbolCache[unit[i]];
                if (charInfo != null && charInfo.IsBase)
                {
                    BaseChar = charInfo.Literal;
                    return;
                }
            }

            // If we got this far, then we didn't find a candidate for the base character.
            // In that case, see if any of the characters are not defined in the phonetic
            // character inventory. If so, then use the first one we encounter as the base.
            for (int i = unit.Length - 1; i >= 0; i--)
            {
                if (App.IPASymbolCache[unit[i]] == null)
                {
                    BaseChar = unit[i].ToString();
                    return;
                }
            }
        }
        /// ------------------------------------------------------------------------------------
        private void InitializeBaseChar(string phone)
        {
            if (CheckIfAmbiguous(phone))
            {
                return;
            }

            var       bldr      = new StringBuilder();
            IPASymbol firstChar = null;
            IPASymbol lastChar  = null;

            foreach (char c in phone)
            {
                var charInfo = App.IPASymbolCache[c];
                if (charInfo != null && charInfo.IsBase)
                {
                    if (charInfo.Type == IPASymbolType.consonant)
                    {
                        bldr.Append('c');
                    }
                    else if (charInfo.Type == IPASymbolType.vowel)
                    {
                        bldr.Append('v');
                    }

                    if (firstChar == null)
                    {
                        firstChar = charInfo;
                    }

                    lastChar = charInfo;
                }
            }

            if (bldr.Length == 0)
            {
                if (firstChar != null && CharType == IPASymbolType.notApplicable)
                {
                    CharType = firstChar.Type;
                }

                return;
            }

            if (bldr.Replace("c", string.Empty).Length == 0)
            {
                // When the sequence of base char. symbols are all consonants,
                // then use the last symbol as the base character.
                _baseChar = lastChar.Literal[0];
                CharType  = IPASymbolType.consonant;
            }
            else
            {
                // The sequence of base char. symbols are not all consonants,
                // so use the first symbol as the base character.
                _baseChar = firstChar.Literal[0];
                CharType  = IPASymbolType.vowel;
            }
        }
Exemple #3
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Parses the specified phonetic string into an array of phones.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        public string[] Parse(string phonetic, bool normalize,
                              bool convertExperimentalTranscriptions, out Dictionary <int, string[]> uncertainPhones)
        {
            uncertainPhones = null;

            // Return an empty array if there's nothing in the phonetic.
            if (string.IsNullOrEmpty(phonetic))
            {
                return(null);
            }

            var       phones = new List <string>(phonetic.Length);
            IPASymbol ciPrev = null;

            // Normalize the string if necessary.
            if (normalize)
            {
                phonetic = FFNormalizer.Normalize(phonetic);
            }

            var origPhoneticRunBeingParsed = phonetic;

            if (convertExperimentalTranscriptions)
            {
                phonetic = _transcriptionChanges.Convert(phonetic);
            }

            phonetic = MarkAmbiguousSequences(phonetic);

            int  phoneStart  = 0;
            bool hasBaseChar = false;

            for (int i = 0; i < phonetic.Length; i++)
            {
                char c       = phonetic[i];
                char badChar = '\0';

                // Check if we've run into a marker indicating
                // the beginning of an ambiguous sequence.
                if (c == kParseTokenMarker)
                {
                    // First, close the previous phone if there is one.
                    if (i > phoneStart)
                    {
                        phones.Add(phonetic.Substring(phoneStart, i - phoneStart));
                    }

                    var ambigPhone =
                        _sortedAmbiguousSeqList.GetAmbigSeqForToken(phonetic[++i]);

                    if (!string.IsNullOrEmpty(ambigPhone))
                    {
                        phones.Add(ambigPhone);
                    }

                    phoneStart = i + 1;
                    continue;
                }

                // Get the information for the current codepoint.
                var ciCurr = App.IPASymbolCache[c];

                // If there's no information for a code point or there is but there isn't
                // any for the previous character and the current character isn't a base
                // character, then treat the character as it's own phone.
                if (ciCurr == null || ciCurr.Type == IPASymbolType.notApplicable)
                {
                    if (i > phoneStart)
                    {
                        phones.Add(phonetic.Substring(phoneStart, i - phoneStart));
                    }

                    // Check if we're at the beginning of an uncertain phone group
                    if (c != '(')
                    {
                        phoneStart = i + 1;
                        badChar    = c;
                    }
                    else
                    {
                        int index        = i + 1;
                        var primaryPhone = GetUncertainties(phonetic, ref index,
                                                            phones.Count, ref uncertainPhones, origPhoneticRunBeingParsed);

                        // Primary phone should only be null when no slash was found
                        // between the parentheses. In that situation, the parentheses are
                        // not considered to be surrounding a group of uncertain phones.
                        if (primaryPhone == null)
                        {
                            badChar = c;
                        }
                        else
                        {
                            phones.Add(primaryPhone);
                            i = index;
                        }

                        phoneStart = i + 1;
                    }

                    ciPrev = null;

                    if (badChar != '\0')
                    {
                        // Log the undefined character.
                        if (LogUndefinedCharactersWhenParsing && App.IPASymbolCache.UndefinedCharacters != null)
                        {
                            App.IPASymbolCache.UndefinedCharacters.Add(c, origPhoneticRunBeingParsed);
                        }

                        phones.Add(c.ToString());
                    }

                    continue;
                }

                // If we've encountered a non base character but nothing precedes it,
                // then it must be a diacritic at the beginning of the phonetic
                // transcription so just put it with the following characters.
                if (ciPrev == null && !ciCurr.IsBase)
                {
                    continue;
                }

                // Is the previous codepoint special in that it's not a base character
                // but a base character must follow it in the same phone (e.g. a tie bar)?
                // If yes, then make sure the current codepoint is a base character or
                // throw it away.
                if (ciPrev != null && (!hasBaseChar || ciPrev.CombinesBaseCharacters) && ciPrev.CanPrecedeBase)
                {
                    ciPrev = ciCurr;
                    continue;
                }

                // At this point, if the current codepoint is a base character and
                // it's not the first in the string, close the previous phone. If
                // ciCurr.IsBase && i > phoneStart but ciPrev == null then it means
                // we've run across some non base characters at the beginning of the
                // transcription that aren't attached to a base character. Therefore,
                // attach them to the first base character that's found. In that case,
                // we don't want to add the phone to the collection yet. We'll wait
                // until we come across the beginning of the next phone.
                if (ciCurr.IsBase && i > phoneStart && ciPrev != null)
                {
                    phones.Add(phonetic.Substring(phoneStart, i - phoneStart));
                    phoneStart  = i;
                    hasBaseChar = false;
                }

                ciPrev       = ciCurr;
                hasBaseChar |= ciCurr.IsBase;
            }

            // Save the last phone
            if (phoneStart < phonetic.Length)
            {
                phones.Add(phonetic.Substring(phoneStart));
            }

            return(phones.ToArray());
        }