/// <summary> /// Gets whether a surrogate pair matches the nameStartChar production from the Turtle specification. /// </summary> /// <param name="c">High surrogate.</param> /// <param name="d">Low surrogate.</param> /// <returns></returns> public static bool IsNameStartChar(char c, char d) { if (UnicodeSpecsHelper.IsHighSurrogate(c) && UnicodeSpecsHelper.IsLowSurrogate(d)) { int codepoint = UnicodeSpecsHelper.ConvertToUtf32(c, d); return(codepoint >= 0x10000 && codepoint <= 0xeffff); } else { return(false); } }
/// <summary> /// Gets whether the given value is the valid local name portion of a prefixed name in Turtle. /// </summary> /// <param name="value">Value.</param> /// <param name="syntax">Turtle Syntax.</param> /// <returns></returns> public static bool IsValidLocalName(String value, TurtleSyntax syntax) { char[] cs = value.ToCharArray(); // Empty local names are valid if (cs.Length == 0) { return(true); } switch (syntax) { case TurtleSyntax.W3C: // PNAME_LN ::= PNAME_NS PN_LOCAL // PNAME_NS ::= PN_PREFIX? ':' // Local name is a syntax of namespace segments String[] portions = value.Split(':'); // Each non-final portion conforms to the PNAME_NS production // This is a PN_PREFIX followed by a ':' so we can call IsPNPrefix() directly // However we have to be careful because the final portion can contain bare : which we already split on int p; for (p = 0; p < portions.Length - 1; p++) { if (portions[p].Length == 0) { continue; } // If we see any of the escape sequence starters or a leading digit then this must be the start of the local name if (portions[p].Contains("%") || portions[p].Contains("\\") || Char.IsDigit(portions[p][0])) { break; } // Otherwise must be a valid prefix if (!IsPNPrefix(portions[p], syntax)) { return(false); } } String final = portions[portions.Length - 1]; if (p < portions.Length - 1) { final = String.Join(":", portions, p, portions.Length - p); } // Final portion may be empty which is valid because a portion may consist solely of a : which would result in this scenario if (final.Length == 0) { return(true); } // Final portion conforms to PN_LOCAL return(IsPNLocal(final)); default: // name ::= nameStartChar nameChar* int start = 1; // Validate first character is a nameStartChar if (!IsNameStartChar(cs[0])) { if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1) { if (!IsNameStartChar(cs[0], cs[1])) { return(false); } start++; } else { return(false); } } if (cs.Length == start) { return(true); } // Further characters must be nameChar for (int i = start; i < cs.Length; i++) { if (!IsNameChar(cs[i])) { if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 1) { if (!IsNameChar(cs[i], cs[i + 1])) { return(false); } i++; } else { return(false); } } } return(true); } }
/// <summary> /// Gets whether the given value matches the PN_LOCAL rule from the Turtle specification. /// </summary> /// <param name="value">Value.</param> /// <returns></returns> public static bool IsPNLocal(String value) { // PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? char[] cs = value.ToCharArray(); int start = 1, temp = 0; // Validate first character if (cs[0] != ':' && !Char.IsDigit(cs[0]) && !IsPLX(cs, 0, out temp) && !IsPNCharsU(cs[0])) { // Handle surrogate pairs for UTF-32 characters if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1) { if (!IsPNCharsU(cs[0], cs[1])) { return(false); } start++; } else { return(false); } } // We may have seen a PLX as the first thing so need to correct start appropriately if (temp > 0) { start = temp + 1; } if (start >= cs.Length) { return(true); } // Intermediate characters can be PN_CHARS, a '.', a ':' or a PLX for (int i = start; i < cs.Length - 1; i++) { int j = i; if (cs[i] != '.' && cs[i] != ':' && !IsPNChars(cs[i]) && !IsPLX(cs, i, out j)) { // Handle surrogate pairs for UTF-32 characters if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 2) { if (!IsPNChars(cs[i], cs[i + 1])) { return(false); } i++; j = i; } else if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i == cs.Length - 2) { // This case handles the case where the final character is a UTF-32 character representing by a surrogate pair return(IsPNChars(cs[i], cs[i + 1])); } else { return(false); } } if (i != j) { // This means we just saw a PLX // Last thing being a PLX is valid if (j == cs.Length - 1) { return(true); } // Otherwise adjust the index appropriately and continue checking further characters i = j; } } // Final character is a ':' or a PN_CHARS return(cs[cs.Length - 1] == ':' || IsPNChars(cs[cs.Length - 1])); }
/// <summary> /// Gets whether the given value is the valid prefix portion of a prefixed name in Turtle. /// </summary> /// <param name="value">Value.</param> /// <param name="syntax">Turtle Syntax.</param> /// <returns></returns> public static bool IsPNPrefix(String value, TurtleSyntax syntax) { char[] cs = value.ToCharArray(); int start = 1; switch (syntax) { case TurtleSyntax.W3C: // PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? if (cs.Length == 0) { return(true); } // First character must be in PN_CHARS_BASE if (!IsPNCharsBase(cs[0])) { // Handle surrogate pairs for UTF-32 characters if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1) { if (!IsPNCharsBase(cs[0], cs[1])) { return(false); } start++; } else { return(false); } } if (cs.Length == start) { return(true); } // Intermediate characters must be a '.' or in PN_CHARS for (int i = start; i < cs.Length - 1; i++) { if (cs[i] != '.' && !IsPNChars(cs[i])) { // Handle surrogate pairs for UTF-32 characters if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 2) { if (!IsPNChars(cs[i], cs[i + 1])) { return(false); } i++; } else if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i == cs.Length - 2) { // This case handles the case where the final character is a UTF-32 character representing by a surrogate pair return(IsPNChars(cs[i], cs[i + 1])); } else { return(false); } } } // Final character must be in PN_CHARS return(IsPNChars(cs[cs.Length - 1])); default: // prefixName ::= ( nameStartChar - '_' ) nameChar* if (cs.Length == 0) { return(true); } // First character must be a name start char and not a _ if (!IsNameStartChar(cs[0]) || cs[0] == '_') { // Handle surrogate pairs for UTF-32 if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1) { if (!IsNameStartChar(cs[0], cs[1])) { return(false); } start++; } else { return(false); } } if (cs.Length == start) { return(true); } // Subsequent characters must be in nameChar for (int i = start; i < cs.Length; i++) { if (!IsNameChar(cs[i])) { // Handle surrogate pairs for UTF-32 if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 1) { if (!IsNameChar(cs[i], cs[i + 1])) { return(false); } i++; } else { return(false); } } } return(true); } }