/// ------------------------------------------------------------------------------------ /// <summary> /// Gets the length of the character including any associated diacritics that follow /// the base character. /// </summary> /// <param name="tok">The text token.</param> /// <param name="iBaseCharacter">The index of the base character in the text token.</param> /// <returns>length of the character, including all following diacritics</returns> /// ------------------------------------------------------------------------------------ private int GetLengthOfChar(ITextToken tok, int iBaseCharacter) { int charLength = 1; int iChar = iBaseCharacter + 1; while (iChar < tok.Text.Length && m_categorizer.IsDiacritic(tok.Text[iChar++])) { charLength++; } return(charLength); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Parses a string into character sequences. /// </summary> /// ------------------------------------------------------------------------------------ public IEnumerable <string> ParseCharacterSequences(string text) { string key = ""; bool diacricsFollow = m_categorizer.DiacriticsFollowBaseCharacters(); foreach (char cc in text) { if (m_categorizer.IsDiacritic(cc)) { if (diacricsFollow) { key += cc; } else { if (key != "") { yield return(key); } key = cc.ToString(); } } else { if (key != "") { yield return(key); } key = cc.ToString(); } } if (key != "") { yield return(key); } }