/// <summary> /// Gets whether a surrogate pair matches the nameStartChar production from the Turtle specification. /// </summary> /// <param name="c">High surrogate.</param> /// <param name="d">Low surrogate.</param> /// <returns></returns> public static bool IsNameStartChar(char c, char d) { if (UnicodeSpecsHelper.IsHighSurrogate(c) && UnicodeSpecsHelper.IsLowSurrogate(d)) { int codepoint = UnicodeSpecsHelper.ConvertToUtf32(c, d); return(codepoint >= 0x10000 && codepoint <= 0xeffff); } else { return(false); } }
/// <summary> /// Gets whether a character matches the ucschar production. /// </summary> /// <param name="c">Character.</param> /// <returns></returns> /// <remarks> /// Not all strings that will match the official ucschar production will be matched by this function as the ucschar production permits character codes beyond the range of the .Net char type. /// </remarks> public static bool IsUcsChar(char c) { return(UnicodeSpecsHelper.IsLetterOrDigit(c)); }
/// <summary> /// Gets whether the given value is the valid local name portion of a prefixed name in Turtle. /// </summary> /// <param name="value">Value.</param> /// <param name="syntax">Turtle Syntax.</param> /// <returns></returns> public static bool IsValidLocalName(String value, TurtleSyntax syntax) { char[] cs = value.ToCharArray(); // Empty local names are valid if (cs.Length == 0) { return(true); } switch (syntax) { case TurtleSyntax.W3C: // PNAME_LN ::= PNAME_NS PN_LOCAL // PNAME_NS ::= PN_PREFIX? ':' // Local name is a syntax of namespace segments String[] portions = value.Split(':'); // Each non-final portion conforms to the PNAME_NS production // This is a PN_PREFIX followed by a ':' so we can call IsPNPrefix() directly // However we have to be careful because the final portion can contain bare : which we already split on int p; for (p = 0; p < portions.Length - 1; p++) { if (portions[p].Length == 0) { continue; } // If we see any of the escape sequence starters or a leading digit then this must be the start of the local name if (portions[p].Contains("%") || portions[p].Contains("\\") || Char.IsDigit(portions[p][0])) { break; } // Otherwise must be a valid prefix if (!IsPNPrefix(portions[p], syntax)) { return(false); } } String final = portions[portions.Length - 1]; if (p < portions.Length - 1) { final = String.Join(":", portions, p, portions.Length - p); } // Final portion may be empty which is valid because a portion may consist solely of a : which would result in this scenario if (final.Length == 0) { return(true); } // Final portion conforms to PN_LOCAL return(IsPNLocal(final)); default: // name ::= nameStartChar nameChar* int start = 1; // Validate first character is a nameStartChar if (!IsNameStartChar(cs[0])) { if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1) { if (!IsNameStartChar(cs[0], cs[1])) { return(false); } start++; } else { return(false); } } if (cs.Length == start) { return(true); } // Further characters must be nameChar for (int i = start; i < cs.Length; i++) { if (!IsNameChar(cs[i])) { if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 1) { if (!IsNameChar(cs[i], cs[i + 1])) { return(false); } i++; } else { return(false); } } } return(true); } }
/// <summary> /// Gets whether the given value matches the PN_LOCAL rule from the Turtle specification. /// </summary> /// <param name="value">Value.</param> /// <returns></returns> public static bool IsPNLocal(String value) { // PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? char[] cs = value.ToCharArray(); int start = 1, temp = 0; // Validate first character if (cs[0] != ':' && !Char.IsDigit(cs[0]) && !IsPLX(cs, 0, out temp) && !IsPNCharsU(cs[0])) { // Handle surrogate pairs for UTF-32 characters if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1) { if (!IsPNCharsU(cs[0], cs[1])) { return(false); } start++; } else { return(false); } } // We may have seen a PLX as the first thing so need to correct start appropriately if (temp > 0) { start = temp + 1; } if (start >= cs.Length) { return(true); } // Intermediate characters can be PN_CHARS, a '.', a ':' or a PLX for (int i = start; i < cs.Length - 1; i++) { int j = i; if (cs[i] != '.' && cs[i] != ':' && !IsPNChars(cs[i]) && !IsPLX(cs, i, out j)) { // Handle surrogate pairs for UTF-32 characters if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 2) { if (!IsPNChars(cs[i], cs[i + 1])) { return(false); } i++; j = i; } else if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i == cs.Length - 2) { // This case handles the case where the final character is a UTF-32 character representing by a surrogate pair return(IsPNChars(cs[i], cs[i + 1])); } else { return(false); } } if (i != j) { // This means we just saw a PLX // Last thing being a PLX is valid if (j == cs.Length - 1) { return(true); } // Otherwise adjust the index appropriately and continue checking further characters i = j; } } // Final character is a ':' or a PN_CHARS return(cs[cs.Length - 1] == ':' || IsPNChars(cs[cs.Length - 1])); }
/// <summary> /// Gets whether the given value is the valid prefix portion of a prefixed name in Turtle. /// </summary> /// <param name="value">Value.</param> /// <param name="syntax">Turtle Syntax.</param> /// <returns></returns> public static bool IsPNPrefix(String value, TurtleSyntax syntax) { char[] cs = value.ToCharArray(); int start = 1; switch (syntax) { case TurtleSyntax.W3C: // PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? if (cs.Length == 0) { return(true); } // First character must be in PN_CHARS_BASE if (!IsPNCharsBase(cs[0])) { // Handle surrogate pairs for UTF-32 characters if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1) { if (!IsPNCharsBase(cs[0], cs[1])) { return(false); } start++; } else { return(false); } } if (cs.Length == start) { return(true); } // Intermediate characters must be a '.' or in PN_CHARS for (int i = start; i < cs.Length - 1; i++) { if (cs[i] != '.' && !IsPNChars(cs[i])) { // Handle surrogate pairs for UTF-32 characters if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 2) { if (!IsPNChars(cs[i], cs[i + 1])) { return(false); } i++; } else if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i == cs.Length - 2) { // This case handles the case where the final character is a UTF-32 character representing by a surrogate pair return(IsPNChars(cs[i], cs[i + 1])); } else { return(false); } } } // Final character must be in PN_CHARS return(IsPNChars(cs[cs.Length - 1])); default: // prefixName ::= ( nameStartChar - '_' ) nameChar* if (cs.Length == 0) { return(true); } // First character must be a name start char and not a _ if (!IsNameStartChar(cs[0]) || cs[0] == '_') { // Handle surrogate pairs for UTF-32 if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1) { if (!IsNameStartChar(cs[0], cs[1])) { return(false); } start++; } else { return(false); } } if (cs.Length == start) { return(true); } // Subsequent characters must be in nameChar for (int i = start; i < cs.Length; i++) { if (!IsNameChar(cs[i])) { // Handle surrogate pairs for UTF-32 if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 1) { if (!IsNameChar(cs[i], cs[i + 1])) { return(false); } i++; } else { return(false); } } } return(true); } }