Пример #1
0
 /// <summary>
 /// Gets whether a surrogate pair matches the nameStartChar production from the Turtle specification.
 /// </summary>
 /// <param name="c">High surrogate.</param>
 /// <param name="d">Low surrogate.</param>
 /// <returns></returns>
 public static bool IsNameStartChar(char c, char d)
 {
     if (UnicodeSpecsHelper.IsHighSurrogate(c) && UnicodeSpecsHelper.IsLowSurrogate(d))
     {
         int codepoint = UnicodeSpecsHelper.ConvertToUtf32(c, d);
         return(codepoint >= 0x10000 && codepoint <= 0xeffff);
     }
     else
     {
         return(false);
     }
 }
Пример #2
0
        /// <summary>
        /// Gets whether the given value is the valid local name portion of a prefixed name in Turtle.
        /// </summary>
        /// <param name="value">Value.</param>
        /// <param name="syntax">Turtle Syntax.</param>
        /// <returns></returns>
        public static bool IsValidLocalName(String value, TurtleSyntax syntax)
        {
            char[] cs = value.ToCharArray();

            // Empty local names are valid
            if (cs.Length == 0)
            {
                return(true);
            }

            switch (syntax)
            {
            case TurtleSyntax.W3C:
                // PNAME_LN	::=	PNAME_NS PN_LOCAL
                // PNAME_NS	::=	PN_PREFIX? ':'

                // Local name is a syntax of namespace segments
                String[] portions = value.Split(':');

                // Each non-final portion conforms to the PNAME_NS production
                // This is a PN_PREFIX followed by a ':' so we can call IsPNPrefix() directly
                // However we have to be careful because the final portion can contain bare : which we already split on
                int p;
                for (p = 0; p < portions.Length - 1; p++)
                {
                    if (portions[p].Length == 0)
                    {
                        continue;
                    }

                    // If we see any of the escape sequence starters or a leading digit then this must be the start of the local name
                    if (portions[p].Contains("%") || portions[p].Contains("\\") || Char.IsDigit(portions[p][0]))
                    {
                        break;
                    }

                    // Otherwise must be a valid prefix
                    if (!IsPNPrefix(portions[p], syntax))
                    {
                        return(false);
                    }
                }

                String final = portions[portions.Length - 1];
                if (p < portions.Length - 1)
                {
                    final = String.Join(":", portions, p, portions.Length - p);
                }

                // Final portion may be empty which is valid because a portion may consist solely of a : which would result in this scenario
                if (final.Length == 0)
                {
                    return(true);
                }

                // Final portion conforms to PN_LOCAL
                return(IsPNLocal(final));

            default:
                // name	::=	nameStartChar nameChar*

                int start = 1;

                // Validate first character is a nameStartChar
                if (!IsNameStartChar(cs[0]))
                {
                    if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1)
                    {
                        if (!IsNameStartChar(cs[0], cs[1]))
                        {
                            return(false);
                        }
                        start++;
                    }
                    else
                    {
                        return(false);
                    }
                }

                if (cs.Length == start)
                {
                    return(true);
                }

                // Further characters must be nameChar
                for (int i = start; i < cs.Length; i++)
                {
                    if (!IsNameChar(cs[i]))
                    {
                        if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 1)
                        {
                            if (!IsNameChar(cs[i], cs[i + 1]))
                            {
                                return(false);
                            }
                            i++;
                        }
                        else
                        {
                            return(false);
                        }
                    }
                }
                return(true);
            }
        }
Пример #3
0
        /// <summary>
        /// Gets whether the given value matches the PN_LOCAL rule from the Turtle specification.
        /// </summary>
        /// <param name="value">Value.</param>
        /// <returns></returns>
        public static bool IsPNLocal(String value)
        {
            // PN_LOCAL	::=	(PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?

            char[] cs = value.ToCharArray();
            int    start = 1, temp = 0;

            // Validate first character
            if (cs[0] != ':' && !Char.IsDigit(cs[0]) && !IsPLX(cs, 0, out temp) && !IsPNCharsU(cs[0]))
            {
                // Handle surrogate pairs for UTF-32 characters
                if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1)
                {
                    if (!IsPNCharsU(cs[0], cs[1]))
                    {
                        return(false);
                    }
                    start++;
                }
                else
                {
                    return(false);
                }
            }
            // We may have seen a PLX as the first thing so need to correct start appropriately
            if (temp > 0)
            {
                start = temp + 1;
            }

            if (start >= cs.Length)
            {
                return(true);
            }

            // Intermediate characters can be PN_CHARS, a '.', a ':' or a PLX
            for (int i = start; i < cs.Length - 1; i++)
            {
                int j = i;
                if (cs[i] != '.' && cs[i] != ':' && !IsPNChars(cs[i]) && !IsPLX(cs, i, out j))
                {
                    // Handle surrogate pairs for UTF-32 characters
                    if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 2)
                    {
                        if (!IsPNChars(cs[i], cs[i + 1]))
                        {
                            return(false);
                        }
                        i++;
                        j = i;
                    }
                    else if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i == cs.Length - 2)
                    {
                        // This case handles the case where the final character is a UTF-32 character representing by a surrogate pair
                        return(IsPNChars(cs[i], cs[i + 1]));
                    }
                    else
                    {
                        return(false);
                    }
                }
                if (i != j)
                {
                    // This means we just saw a PLX
                    // Last thing being a PLX is valid
                    if (j == cs.Length - 1)
                    {
                        return(true);
                    }
                    // Otherwise adjust the index appropriately and continue checking further characters
                    i = j;
                }
            }

            // Final character is a ':' or a PN_CHARS
            return(cs[cs.Length - 1] == ':' || IsPNChars(cs[cs.Length - 1]));
        }
Пример #4
0
        /// <summary>
        /// Gets whether the given value is the valid prefix portion of a prefixed name in Turtle.
        /// </summary>
        /// <param name="value">Value.</param>
        /// <param name="syntax">Turtle Syntax.</param>
        /// <returns></returns>
        public static bool IsPNPrefix(String value, TurtleSyntax syntax)
        {
            char[] cs    = value.ToCharArray();
            int    start = 1;

            switch (syntax)
            {
            case TurtleSyntax.W3C:
                // PN_PREFIX	::=	PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?

                if (cs.Length == 0)
                {
                    return(true);
                }

                // First character must be in PN_CHARS_BASE
                if (!IsPNCharsBase(cs[0]))
                {
                    // Handle surrogate pairs for UTF-32 characters
                    if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1)
                    {
                        if (!IsPNCharsBase(cs[0], cs[1]))
                        {
                            return(false);
                        }
                        start++;
                    }
                    else
                    {
                        return(false);
                    }
                }
                if (cs.Length == start)
                {
                    return(true);
                }

                // Intermediate characters must be a '.' or in PN_CHARS
                for (int i = start; i < cs.Length - 1; i++)
                {
                    if (cs[i] != '.' && !IsPNChars(cs[i]))
                    {
                        // Handle surrogate pairs for UTF-32 characters
                        if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 2)
                        {
                            if (!IsPNChars(cs[i], cs[i + 1]))
                            {
                                return(false);
                            }
                            i++;
                        }
                        else if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i == cs.Length - 2)
                        {
                            // This case handles the case where the final character is a UTF-32 character representing by a surrogate pair
                            return(IsPNChars(cs[i], cs[i + 1]));
                        }
                        else
                        {
                            return(false);
                        }
                    }
                }

                // Final character must be in PN_CHARS
                return(IsPNChars(cs[cs.Length - 1]));

            default:
                // prefixName	::=	( nameStartChar - '_' ) nameChar*

                if (cs.Length == 0)
                {
                    return(true);
                }

                // First character must be a name start char and not a _
                if (!IsNameStartChar(cs[0]) || cs[0] == '_')
                {
                    // Handle surrogate pairs for UTF-32
                    if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1)
                    {
                        if (!IsNameStartChar(cs[0], cs[1]))
                        {
                            return(false);
                        }
                        start++;
                    }
                    else
                    {
                        return(false);
                    }
                }
                if (cs.Length == start)
                {
                    return(true);
                }

                // Subsequent characters must be in nameChar
                for (int i = start; i < cs.Length; i++)
                {
                    if (!IsNameChar(cs[i]))
                    {
                        // Handle surrogate pairs for UTF-32
                        if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 1)
                        {
                            if (!IsNameChar(cs[i], cs[i + 1]))
                            {
                                return(false);
                            }
                            i++;
                        }
                        else
                        {
                            return(false);
                        }
                    }
                }
                return(true);
            }
        }