/// <summary>Returns if a character is acceptable for XML name tokens /// conforming to the "Namespaces Constraints".</summary>. /// <remarks>Identical to <see cref="IsNmTokenType"/> except /// that colons are not allowed.</remarks> /// <seealso href="http://w3.org/TR/REC-xml-names/#NT-NCName">NCNameChar Production on w3.org.</seealso> /// <seealso href="http://w3.org/TR/REC-xml-names/#Conformance">Namespaces Constraints on w3.org.</seealso> public static bool IsNcNmTokenType(CharStruct cs) { switch (GetCharType(cs)) { case CharType.NONASCII: if ((NamingBitmap[(NamePages[cs.Hi] << 3) + (cs.Lo >> 5)] & (1 << (cs.Lo & 0x1F))) == 0) { return(false); } break; case CharType.NMSTRT: case CharType.HEX: case CharType.DIGIT: case CharType.NAME: case CharType.MINUS: break; // OK case CharType.COLON: return(false); default: return(false); } return(true); }
/// <param name="str">String to be checked.</param> public static bool IsNcNmToken(string str) { if (str == String.Empty) { return(false); } for (int indx = 0; indx < str.Length; indx++) { CharStruct cs = new CharStruct(str[indx]); if (!IsNcNmTokenType(cs)) { return(false); } } return(true); }
/// <param name="chars">Character array containing string to be checked.</param> /// <param name="start">Start index of string.</param> /// <param name="len">Length of string.</param> public static bool IsNcNmToken(char[] chars, int start, int len) { if (len <= 0) { return(false); } int endIndx = start + len; for (int indx = start; indx < endIndx; indx++) { CharStruct cs = new CharStruct(chars[indx]); if (!IsNcNmTokenType(cs)) { return(false); } } return(true); }
/// <param name="str">String to be checked.</param> public static bool IsNcName(string str) { if (str == String.Empty) { return(false); } CharStruct cs = new CharStruct(str[0]); if (!IsNcNameStartType(cs)) { return(false); } if (str.Length == 1) { return(true); } else { return(IsNcNmToken(str, 1, str.Length - 1)); } }
/// <summary>Returns if a character is acceptable as an XML name start character.</summary>. /// <seealso href="http://www.w3.org/TR/REC-xml/#NT-Name">Name production on w3.org.</seealso> public static bool IsNameStartType(CharStruct cs) { switch (GetCharType(cs)) { case CharType.NONASCII: if ((NamingBitmap[(NmStartPages[cs.Hi] << 3) + (cs.Lo >> 5)] & (1 << (cs.Lo & 0x1F))) == 0) { return(false); } break; case CharType.NMSTRT: case CharType.HEX: case CharType.COLON: break; // OK default: return(false); } return(true); }
/// <param name="str">String containing sub-string to be checked.</param> /// <param name="start">Start index of sub-string.</param> /// <param name="len">Length of sub-string.</param> public static bool IsNcNmToken(string str, int start, int len) { if (len <= 0) { return(false); } int endIndx = start + len; for (int indx = start; indx < str.Length; indx++) { if (indx == endIndx) { break; } CharStruct cs = new CharStruct(str[indx]); if (!IsNcNmTokenType(cs)) { return(false); } } return(true); }
/// <param name="chars">Character array containing string to be checked.</param> /// <param name="start">Start index of string.</param> /// <param name="len">Length of string.</param> public static bool IsNcName(char[] chars, int start, int len) { if (len <= 0) { return(false); } CharStruct cs = new CharStruct(chars[start]); if (!IsNcNameStartType(cs)) { return(false); } len--; if (len == 0) { return(true); } else { start++; return(IsNcNmToken(chars, start, len)); } }
/// <summary>Returns character type for XML character processing.</summary> /// <remarks>Useful for character and name checking routines.</remarks> public static CharType GetCharType(CharStruct cs) { if (cs.Hi == 0) { return(Latin1ByteTypes[cs.Lo]); } else { switch (cs.Hi) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: return(CharType.LEAD4); case 0xDC: case 0xDD: case 0xDE: case 0xDF: return(CharType.TRAIL); case 0xFF: if (cs.Lo == 0xFF || cs.Lo == 0xFE) { return(CharType.NONXML); } else { return(CharType.NONASCII); } default: return(CharType.NONASCII); } } }
/// <overloads> /// <summary>Checks if a string of characters is a well-formed XML name token.</summary> /// <returns><c>true</c> if string is a valid XML name token, <c>false</c> otherwise.</returns> /// <seealso href="http://www.w3.org/TR/REC-xml/#NT-Nmtoken">NmToken production on w3.org.</seealso> /// </overloads> /// <remarks>Contains <c>unsafe</c> code tuned for performance.</remarks> /// <param name="nmTokPtr">Pointer to first character in string to be checked.</param> /// <param name="len">Length of string.</param> public static unsafe bool IsNmToken(char* nmTokPtr, int len) { if (len <= 0) return false; do { CharStruct cs = new CharStruct(*nmTokPtr); // inlined call to GetCharType(cs) CharType ct; if (cs.Hi == 0) ct = Latin1ByteTypes[cs.Lo]; else { switch (cs.Hi) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: ct = CharType.LEAD4; break; case 0xDC: case 0xDD: case 0xDE: case 0xDF: ct = CharType.TRAIL; break; case 0xFF: if (cs.Lo == 0xFF || cs.Lo == 0xFE) ct = CharType.NONXML; else ct = CharType.NONASCII; break; default: ct = CharType.NONASCII; break; } } // inlined call to IsNmTokenType(cs) switch (ct) { case CharType.NONASCII: if ((NamingBitmap[(NamePages[cs.Hi] << 3) + (cs.Lo >> 5)] & (1 << (cs.Lo & 0x1F))) == 0) return false; break; case CharType.NMSTRT: case CharType.HEX: case CharType.DIGIT: case CharType.NAME: case CharType.MINUS: case CharType.COLON: break; // OK default: return false; } nmTokPtr++; len--; } while (len != 0); return true; }
/// <summary>Returns if a character is acceptable as an XML name start /// character conforming to the "Namespaces Constraints".</summary>. /// <remarks>Identical to <see cref="IsNameStartType"/> except /// that colons are not allowed.</remarks> /// <seealso href="http://w3.org/TR/REC-xml-names/#NT-NCName">NCName production on w3.org.</seealso> /// <seealso href="http://w3.org/TR/REC-xml-names/#Conformance">Namespaces Constraints on w3.org.</seealso> public static bool IsNcNameStartType(CharStruct cs) { switch (GetCharType(cs)) { case CharType.NONASCII: if ((NamingBitmap[(NmStartPages[cs.Hi] << 3) + (cs.Lo >> 5)] & (1 << (cs.Lo & 0x1F))) == 0) return false; break; case CharType.NMSTRT: case CharType.HEX: break; // OK case CharType.COLON: return false; default: return false; } return true; }
/// <param name="str">String to be checked.</param> public static bool IsNcName(string str) { if (str == String.Empty) return false; CharStruct cs = new CharStruct(str[0]); if (!IsNcNameStartType(cs)) return false; if (str.Length == 1) return true; else return IsNcNmToken(str, 1, str.Length - 1); }
/// <param name="chars">Character array containing string to be checked.</param> /// <param name="start">Start index of string.</param> /// <param name="len">Length of string.</param> public static bool IsNcName(char[] chars, int start, int len) { if (len <= 0) return false; CharStruct cs = new CharStruct(chars[start]); if (!IsNcNameStartType(cs)) return false; len--; if (len == 0) return true; else { start++; return IsNcNmToken(chars, start, len); } }
/// <overloads> /// <summary>Checks if a string of characters is a valid XML name /// conforming to the "Namespaces Constraints".</summary> /// <remarks>This applies to <see href="http://w3.org/TR/REC-xml-names/#NT-NCName">NCNames</see> /// but not to <see href="http://w3.org/TR/REC-xml-names/#ns-qualnames">Qualified Names</see>, /// so prefixes separated by a colon are not allowed.</remarks> /// <seealso href="http://w3.org/TR/REC-xml-names/#NT-NCName">NCName production on w3.org.</seealso> /// <seealso href="http://w3.org/TR/REC-xml-names/#Conformance">Namespaces Constraints on w3.org.</seealso> /// <returns><c>true</c> if string is a valid XML name, <c>false</c> otherwise.</returns> /// </overloads> /// <remarks>Contains <c>unsafe</c> code tuned for performance.</remarks> /// <param name="namePtr">Pointer to first character in string to be checked.</param> /// <param name="len">Length of string.</param> public static unsafe bool IsNcName(char* namePtr, int len) { if (len <= 0) return false; CharStruct cs = new CharStruct(*namePtr); CharType ct; // inlined call to GetCharType(cs) if (cs.Hi == 0) ct = Latin1ByteTypes[cs.Lo]; else { switch (cs.Hi) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: ct = CharType.LEAD4; break; case 0xDC: case 0xDD: case 0xDE: case 0xDF: ct = CharType.TRAIL; break; case 0xFF: if (cs.Lo == 0xFF || cs.Lo == 0xFE) ct = CharType.NONXML; else ct = CharType.NONASCII; break; default: ct = CharType.NONASCII; break; } } // inlined call to IsNcNameStartType(cs) switch (ct) { case CharType.NONASCII: if ((NamingBitmap[(NmStartPages[cs.Hi] << 3) + (cs.Lo >> 5)] & (1 << (cs.Lo & 0x1F))) == 0) return false; break; case CharType.NMSTRT: case CharType.HEX: break; // OK case CharType.COLON: return false; default: return false; } len--; if (len == 0) return true; else { namePtr++; return IsNcNmToken(namePtr, len); } }
/// <summary>Returns character type for XML character processing.</summary> /// <remarks>Useful for character and name checking routines.</remarks> public static CharType GetCharType(CharStruct cs) { if (cs.Hi == 0) return Latin1ByteTypes[cs.Lo]; else { switch (cs.Hi) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: return CharType.LEAD4; case 0xDC: case 0xDD: case 0xDE: case 0xDF: return CharType.TRAIL; case 0xFF: if (cs.Lo == 0xFF || cs.Lo == 0xFE) return CharType.NONXML; else return CharType.NONASCII; default: return CharType.NONASCII; } } }
/// <param name="str">String containing sub-string to be checked.</param> /// <param name="start">Start index of sub-string.</param> /// <param name="len">Length of sub-string.</param> public static bool IsNmToken(string str, int start, int len) { if (len <= 0) return false; int endIndx = start + len; for (int indx = start; indx < str.Length; indx++) { if (indx == endIndx) break; CharStruct cs = new CharStruct(str[indx]); if (!IsNmTokenType(cs)) return false; } return true; }
/// <param name="chars">Character array containing string to be checked.</param> /// <param name="start">Start index of string.</param> /// <param name="len">Length of string.</param> public static bool IsNmToken(char[] chars, int start, int len) { if (len <= 0) return false; int endIndx = start + len; for (int indx = start; indx < endIndx; indx++) { CharStruct cs = new CharStruct(chars[indx]); if (!IsNmTokenType(cs)) return false; } return true; }
/// <overloads> /// <summary>Checks if a string of characters is a valid XML name /// conforming to the "Namespaces Constraints".</summary> /// <remarks>This applies to <see href="http://w3.org/TR/REC-xml-names/#NT-NCName">NCNames</see> /// but not to <see href="http://w3.org/TR/REC-xml-names/#ns-qualnames">Qualified Names</see>, /// so prefixes separated by a colon are not allowed.</remarks> /// <seealso href="http://w3.org/TR/REC-xml-names/#NT-NCName">NCName production on w3.org.</seealso> /// <seealso href="http://w3.org/TR/REC-xml-names/#Conformance">Namespaces Constraints on w3.org.</seealso> /// <returns><c>true</c> if string is a valid XML name, <c>false</c> otherwise.</returns> /// </overloads> /// <remarks>Contains <c>unsafe</c> code tuned for performance.</remarks> /// <param name="namePtr">Pointer to first character in string to be checked.</param> /// <param name="len">Length of string.</param> public static unsafe bool IsNcName(char *namePtr, int len) { if (len <= 0) { return(false); } CharStruct cs = new CharStruct(*namePtr); CharType ct; // inlined call to GetCharType(cs) if (cs.Hi == 0) { ct = Latin1ByteTypes[cs.Lo]; } else { switch (cs.Hi) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: ct = CharType.LEAD4; break; case 0xDC: case 0xDD: case 0xDE: case 0xDF: ct = CharType.TRAIL; break; case 0xFF: if (cs.Lo == 0xFF || cs.Lo == 0xFE) { ct = CharType.NONXML; } else { ct = CharType.NONASCII; } break; default: ct = CharType.NONASCII; break; } } // inlined call to IsNcNameStartType(cs) switch (ct) { case CharType.NONASCII: if ((NamingBitmap[(NmStartPages[cs.Hi] << 3) + (cs.Lo >> 5)] & (1 << (cs.Lo & 0x1F))) == 0) { return(false); } break; case CharType.NMSTRT: case CharType.HEX: break; // OK case CharType.COLON: return(false); default: return(false); } len--; if (len == 0) { return(true); } else { namePtr++; return(IsNcNmToken(namePtr, len)); } }
/// <overloads> /// <summary>Checks if a string of characters is a well-formed XML name token /// conforming to the "Namespaces Constraints".</summary> /// <returns><c>true</c> if string is a valid XML name token, <c>false</c> otherwise.</returns> /// <seealso href="http://w3.org/TR/REC-xml-names/#NT-NCName">NCNameChar Production on w3.org.</seealso> /// <seealso href="http://w3.org/TR/REC-xml-names/#Conformance">Namespaces Constraints on w3.org.</seealso> /// </overloads> /// <remarks>Contains <c>unsafe</c> code tuned for performance.</remarks> /// <param name="nmTokPtr">Pointer to first character in string to be checked.</param> /// <param name="len">Length of string.</param> public static unsafe bool IsNcNmToken(char *nmTokPtr, int len) { if (len <= 0) { return(false); } do { CharStruct cs = new CharStruct(*nmTokPtr); // inlined call to GetCharType(cs) CharType ct; if (cs.Hi == 0) { ct = Latin1ByteTypes[cs.Lo]; } else { switch (cs.Hi) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: ct = CharType.LEAD4; break; case 0xDC: case 0xDD: case 0xDE: case 0xDF: ct = CharType.TRAIL; break; case 0xFF: if (cs.Lo == 0xFF || cs.Lo == 0xFE) { ct = CharType.NONXML; } else { ct = CharType.NONASCII; } break; default: ct = CharType.NONASCII; break; } } // inlined call to IsNcNmTokenType(cs) switch (ct) { case CharType.NONASCII: if ((NamingBitmap[(NamePages[cs.Hi] << 3) + (cs.Lo >> 5)] & (1 << (cs.Lo & 0x1F))) == 0) { return(false); } break; case CharType.NMSTRT: case CharType.HEX: case CharType.DIGIT: case CharType.NAME: case CharType.MINUS: break; // OK case CharType.COLON: return(false); default: return(false); } nmTokPtr++; len--; } while (len != 0); return(true); }
/// <param name="str">String to be checked.</param> public static bool IsNmToken(string str) { if (str == String.Empty) return false; for (int indx = 0; indx < str.Length; indx++) { CharStruct cs = new CharStruct(str[indx]); if (!IsNmTokenType(cs)) return false; } return true; }
/// <overloads> /// <summary>Checks if UTF-16 encoded string contains valid XML characters.</summary> /// <remarks>If the return value indicates an invalid character then this means /// that either a complete but invalid character was found, or that there are not /// enough bytes left to form a complete character. If the second part of a surrogate /// pair is invalid or missing then the return value points to the first part.</remarks> /// </overloads> /// <remarks>Contains <c>unsafe</c> code tuned for performance.</remarks> /// <param name="strPtr">Pointer to first character in string.</param> /// <param name="len">Length of string.</param> /// <returns>Pointer to first invalid character, or <c>null</c> if string valid.</returns> public static unsafe char *CheckStringValid(char *strPtr, int len) { if (len <= 0) { return(null); } bool surrogate = false; char *endPtr = strPtr + len; while (strPtr < endPtr) { CharStruct cs = new CharStruct(*strPtr); // inlined call to GetCharType(cs) CharType ct; if (cs.Hi == 0) { ct = Latin1ByteTypes[cs.Lo]; } else { switch (cs.Hi) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: ct = CharType.LEAD4; break; case 0xDC: case 0xDD: case 0xDE: case 0xDF: ct = CharType.TRAIL; break; case 0xFF: if (cs.Lo == 0xFF || cs.Lo == 0xFE) { ct = CharType.NONXML; } else { ct = CharType.NONASCII; } break; default: ct = CharType.NONASCII; break; } } if (surrogate) { if (ct == CharType.TRAIL) { strPtr++; } else // return pointer to first part of surrogate pair { return(--strPtr); } surrogate = false; } else { switch (ct) { case CharType.LEAD4: if ((endPtr - strPtr) < 2) { return(strPtr); } surrogate = true; strPtr++; break; case CharType.NONXML: case CharType.MALFORM: case CharType.TRAIL: return(strPtr); default: strPtr++; break; } } } return(null); }
/// <summary>Returns if character is acceptable for XML name tokens.</summary> /// <seealso href="http://www.w3.org/TR/REC-xml/#NT-Nmtoken">NmToken Production on w3.org.</seealso> public static bool IsNmTokenType(CharStruct cs) { switch (GetCharType(cs)) { case CharType.NONASCII: if ((NamingBitmap[(NamePages[cs.Hi] << 3) + (cs.Lo >> 5)] & (1 << (cs.Lo & 0x1F))) == 0) return false; break; case CharType.NMSTRT: case CharType.HEX: case CharType.DIGIT: case CharType.NAME: case CharType.MINUS: case CharType.COLON: break; // OK default: return false; } return true; }
/// <overloads> /// <summary>Checks if UTF-16 encoded string contains valid XML characters.</summary> /// <remarks>If the return value indicates an invalid character then this means /// that either a complete but invalid character was found, or that there are not /// enough bytes left to form a complete character. If the second part of a surrogate /// pair is invalid or missing then the return value points to the first part.</remarks> /// </overloads> /// <remarks>Contains <c>unsafe</c> code tuned for performance.</remarks> /// <param name="strPtr">Pointer to first character in string.</param> /// <param name="len">Length of string.</param> /// <returns>Pointer to first invalid character, or <c>null</c> if string valid.</returns> public static unsafe char* CheckStringValid(char* strPtr, int len) { if (len <= 0) return null; bool surrogate = false; char* endPtr = strPtr + len; while (strPtr < endPtr) { CharStruct cs = new CharStruct(*strPtr); // inlined call to GetCharType(cs) CharType ct; if (cs.Hi == 0) ct = Latin1ByteTypes[cs.Lo]; else { switch (cs.Hi) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: ct = CharType.LEAD4; break; case 0xDC: case 0xDD: case 0xDE: case 0xDF: ct = CharType.TRAIL; break; case 0xFF: if (cs.Lo == 0xFF || cs.Lo == 0xFE) ct = CharType.NONXML; else ct = CharType.NONASCII; break; default: ct = CharType.NONASCII; break; } } if (surrogate) { if (ct == CharType.TRAIL) strPtr++; else // return pointer to first part of surrogate pair return --strPtr; surrogate = false; } else { switch (ct) { case CharType.LEAD4: if ((endPtr - strPtr) < 2) return strPtr; surrogate = true; strPtr++; break; case CharType.NONXML: case CharType.MALFORM: case CharType.TRAIL: return strPtr; default: strPtr++; break; } } } return null; }