/// <summary> // This is a helper method that an app could create to find one or all available // ids within a string. /// </summary> /// <param name="inputString">String that contains one or more ids</param> /// <returns>List of individual ids found in the input string</returns> private List <String> FindIdsInString(String inputString) { // List where we maintain the ids found in the input string List <String> idList = new List <String>(); // Maintains the beginning index of the id found in the input string int indexIdStart = -1; // Iterate through each of the characters in the string for (int i = 0; i < inputString.Length; i++) { uint codepoint = inputString[i]; // If the character is a high surrogate, then we need to read the next character to make // sure it is a low surrogate. If we are at the last character in the input string, then // we have an error, since a high surrogate must be matched by a low surrogate. Update // the code point with the surrogate pair. if (UnicodeCharacters.IsHighSurrogate(codepoint)) { if (++i >= inputString.Length) { throw new ArgumentException("Bad trailing surrogate at end of string"); } codepoint = UnicodeCharacters.GetCodepointFromSurrogatePair(inputString[i - 1], inputString[i]); } // Have we found an id start? if (indexIdStart == -1) { if (UnicodeCharacters.IsIdStart(codepoint)) { // We found a character that is an id start. In case we had a suplemmentary // character (high and low surrogate), then the index needs to offset by 1. indexIdStart = UnicodeCharacters.IsSupplementary(codepoint) ? i - 1 : i; } } else if (!UnicodeCharacters.IsIdContinue(codepoint)) { // We have not found an id continue, so the id is complete. We need to // create the identifier string idList.Add(inputString.Substring(indexIdStart, i - indexIdStart)); // Reset back the index start and re-examine the current code point // in next iteration indexIdStart = -1; i--; } } // Do we have a pending id at the end of the string? if (indexIdStart != -1) { // We need to create the identifier string idList.Add(inputString.Substring(indexIdStart)); } // Return the list of identifiers found in the string return(idList); }
public static bool IsWhiteSpaceOrControl(uint c) { UnicodeGeneralCategory category = UnicodeCharacters.GetGeneralCategory(c); return(category == UnicodeGeneralCategory.Control || category == UnicodeGeneralCategory.SpaceSeparator || category == UnicodeGeneralCategory.LineSeparator || category == UnicodeGeneralCategory.ParagraphSeparator); }
public static bool IsInCategory(uint c, UnicodeGeneralCategory cat) { return(cat == UnicodeCharacters.GetGeneralCategory(c)); }
/// <summary> // This is a helper method that an app could create to find one or all available // ids within a string. An id begins with a character for which IsIdStart, // and continues with characters that are IsIdContinue. Invalid sequences are ignored. /// </summary> /// <param name="inputString">String from which to extract ids</param> /// <returns>List of individual ids found in the input string</returns> private List <String> FindIdsInString(String inputString) { // List where we maintain the ids found in the input string List <String> idList = new List <String>(); // Maintains the beginning index of the id found in the input string int indexIdStart = -1; // Iterate through each of the characters in the string int i = 0; while (i < inputString.Length) { int nextIndex; uint codepoint = inputString[i]; if (UnicodeCharacters.IsHighSurrogate(codepoint)) { // If the character is a high surrogate, then the next characters must be a low surrogate. if ((i < inputString.Length) && (UnicodeCharacters.IsLowSurrogate(inputString[i + 1]))) { // Update the code point with the surrogate pair. codepoint = UnicodeCharacters.GetCodepointFromSurrogatePair(codepoint, inputString[i + 1]); nextIndex = i + 2; } else { // Warning: High surrogate not followed by low surrogate. codepoint = 0; nextIndex = i + 1; } } else { // Not a surrogate pair. nextIndex = i + 1; } if (indexIdStart == -1) { // Not in an id. Have we found an id start? if (UnicodeCharacters.IsIdStart(codepoint)) { indexIdStart = i; } } else if (!UnicodeCharacters.IsIdContinue(codepoint)) { // We have not found an id continue, so the id is complete. We need to // create the identifier string idList.Add(inputString.Substring(indexIdStart, i - indexIdStart)); // Reset back the index start and re-examine the current code point // in next iteration indexIdStart = -1; nextIndex = i; } i = nextIndex; } // Do we have a pending id at the end of the string? if (indexIdStart != -1) { // We need to create the identifier string idList.Add(inputString.Substring(indexIdStart, i - indexIdStart)); } // Return the list of identifiers found in the string return(idList); }