Ejemplo n.º 1
0
 /// <summary>
 /// Gets whether a surrogate pair matches the nameStartChar production from the Turtle specification.
 /// </summary>
 /// <param name="c">High surrogate.</param>
 /// <param name="d">Low surrogate.</param>
 /// <returns></returns>
 public static bool IsNameStartChar(char c, char d)
 {
     if (UnicodeSpecsHelper.IsHighSurrogate(c) && UnicodeSpecsHelper.IsLowSurrogate(d))
     {
         int codepoint = UnicodeSpecsHelper.ConvertToUtf32(c, d);
         return(codepoint >= 0x10000 && codepoint <= 0xeffff);
     }
     else
     {
         return(false);
     }
 }
Ejemplo n.º 2
0
 /// <summary>
 /// Gets whether a character matches the ucschar production.
 /// </summary>
 /// <param name="c">Character.</param>
 /// <returns></returns>
 /// <remarks>
 /// Not all strings that will match the official ucschar production will be matched by this function as the ucschar production permits character codes beyond the range of the .Net char type.
 /// </remarks>
 public static bool IsUcsChar(char c)
 {
     return(UnicodeSpecsHelper.IsLetterOrDigit(c));
 }
Ejemplo n.º 3
0
        /// <summary>
        /// Gets whether the given value is the valid local name portion of a prefixed name in Turtle.
        /// </summary>
        /// <param name="value">Value.</param>
        /// <param name="syntax">Turtle Syntax.</param>
        /// <returns></returns>
        public static bool IsValidLocalName(String value, TurtleSyntax syntax)
        {
            char[] cs = value.ToCharArray();

            // Empty local names are valid
            if (cs.Length == 0)
            {
                return(true);
            }

            switch (syntax)
            {
            case TurtleSyntax.W3C:
                // PNAME_LN	::=	PNAME_NS PN_LOCAL
                // PNAME_NS	::=	PN_PREFIX? ':'

                // Local name is a syntax of namespace segments
                String[] portions = value.Split(':');

                // Each non-final portion conforms to the PNAME_NS production
                // This is a PN_PREFIX followed by a ':' so we can call IsPNPrefix() directly
                // However we have to be careful because the final portion can contain bare : which we already split on
                int p;
                for (p = 0; p < portions.Length - 1; p++)
                {
                    if (portions[p].Length == 0)
                    {
                        continue;
                    }

                    // If we see any of the escape sequence starters or a leading digit then this must be the start of the local name
                    if (portions[p].Contains("%") || portions[p].Contains("\\") || Char.IsDigit(portions[p][0]))
                    {
                        break;
                    }

                    // Otherwise must be a valid prefix
                    if (!IsPNPrefix(portions[p], syntax))
                    {
                        return(false);
                    }
                }

                String final = portions[portions.Length - 1];
                if (p < portions.Length - 1)
                {
                    final = String.Join(":", portions, p, portions.Length - p);
                }

                // Final portion may be empty which is valid because a portion may consist solely of a : which would result in this scenario
                if (final.Length == 0)
                {
                    return(true);
                }

                // Final portion conforms to PN_LOCAL
                return(IsPNLocal(final));

            default:
                // name	::=	nameStartChar nameChar*

                int start = 1;

                // Validate first character is a nameStartChar
                if (!IsNameStartChar(cs[0]))
                {
                    if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1)
                    {
                        if (!IsNameStartChar(cs[0], cs[1]))
                        {
                            return(false);
                        }
                        start++;
                    }
                    else
                    {
                        return(false);
                    }
                }

                if (cs.Length == start)
                {
                    return(true);
                }

                // Further characters must be nameChar
                for (int i = start; i < cs.Length; i++)
                {
                    if (!IsNameChar(cs[i]))
                    {
                        if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 1)
                        {
                            if (!IsNameChar(cs[i], cs[i + 1]))
                            {
                                return(false);
                            }
                            i++;
                        }
                        else
                        {
                            return(false);
                        }
                    }
                }
                return(true);
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Gets whether the given value matches the PN_LOCAL rule from the Turtle specification.
        /// </summary>
        /// <param name="value">Value.</param>
        /// <returns></returns>
        public static bool IsPNLocal(String value)
        {
            // PN_LOCAL	::=	(PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?

            char[] cs = value.ToCharArray();
            int    start = 1, temp = 0;

            // Validate first character
            if (cs[0] != ':' && !Char.IsDigit(cs[0]) && !IsPLX(cs, 0, out temp) && !IsPNCharsU(cs[0]))
            {
                // Handle surrogate pairs for UTF-32 characters
                if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1)
                {
                    if (!IsPNCharsU(cs[0], cs[1]))
                    {
                        return(false);
                    }
                    start++;
                }
                else
                {
                    return(false);
                }
            }
            // We may have seen a PLX as the first thing so need to correct start appropriately
            if (temp > 0)
            {
                start = temp + 1;
            }

            if (start >= cs.Length)
            {
                return(true);
            }

            // Intermediate characters can be PN_CHARS, a '.', a ':' or a PLX
            for (int i = start; i < cs.Length - 1; i++)
            {
                int j = i;
                if (cs[i] != '.' && cs[i] != ':' && !IsPNChars(cs[i]) && !IsPLX(cs, i, out j))
                {
                    // Handle surrogate pairs for UTF-32 characters
                    if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 2)
                    {
                        if (!IsPNChars(cs[i], cs[i + 1]))
                        {
                            return(false);
                        }
                        i++;
                        j = i;
                    }
                    else if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i == cs.Length - 2)
                    {
                        // This case handles the case where the final character is a UTF-32 character representing by a surrogate pair
                        return(IsPNChars(cs[i], cs[i + 1]));
                    }
                    else
                    {
                        return(false);
                    }
                }
                if (i != j)
                {
                    // This means we just saw a PLX
                    // Last thing being a PLX is valid
                    if (j == cs.Length - 1)
                    {
                        return(true);
                    }
                    // Otherwise adjust the index appropriately and continue checking further characters
                    i = j;
                }
            }

            // Final character is a ':' or a PN_CHARS
            return(cs[cs.Length - 1] == ':' || IsPNChars(cs[cs.Length - 1]));
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Gets whether the given value is the valid prefix portion of a prefixed name in Turtle.
        /// </summary>
        /// <param name="value">Value.</param>
        /// <param name="syntax">Turtle Syntax.</param>
        /// <returns></returns>
        public static bool IsPNPrefix(String value, TurtleSyntax syntax)
        {
            char[] cs    = value.ToCharArray();
            int    start = 1;

            switch (syntax)
            {
            case TurtleSyntax.W3C:
                // PN_PREFIX	::=	PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?

                if (cs.Length == 0)
                {
                    return(true);
                }

                // First character must be in PN_CHARS_BASE
                if (!IsPNCharsBase(cs[0]))
                {
                    // Handle surrogate pairs for UTF-32 characters
                    if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1)
                    {
                        if (!IsPNCharsBase(cs[0], cs[1]))
                        {
                            return(false);
                        }
                        start++;
                    }
                    else
                    {
                        return(false);
                    }
                }
                if (cs.Length == start)
                {
                    return(true);
                }

                // Intermediate characters must be a '.' or in PN_CHARS
                for (int i = start; i < cs.Length - 1; i++)
                {
                    if (cs[i] != '.' && !IsPNChars(cs[i]))
                    {
                        // Handle surrogate pairs for UTF-32 characters
                        if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 2)
                        {
                            if (!IsPNChars(cs[i], cs[i + 1]))
                            {
                                return(false);
                            }
                            i++;
                        }
                        else if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i == cs.Length - 2)
                        {
                            // This case handles the case where the final character is a UTF-32 character representing by a surrogate pair
                            return(IsPNChars(cs[i], cs[i + 1]));
                        }
                        else
                        {
                            return(false);
                        }
                    }
                }

                // Final character must be in PN_CHARS
                return(IsPNChars(cs[cs.Length - 1]));

            default:
                // prefixName	::=	( nameStartChar - '_' ) nameChar*

                if (cs.Length == 0)
                {
                    return(true);
                }

                // First character must be a name start char and not a _
                if (!IsNameStartChar(cs[0]) || cs[0] == '_')
                {
                    // Handle surrogate pairs for UTF-32
                    if (UnicodeSpecsHelper.IsHighSurrogate(cs[0]) && cs.Length > 1)
                    {
                        if (!IsNameStartChar(cs[0], cs[1]))
                        {
                            return(false);
                        }
                        start++;
                    }
                    else
                    {
                        return(false);
                    }
                }
                if (cs.Length == start)
                {
                    return(true);
                }

                // Subsequent characters must be in nameChar
                for (int i = start; i < cs.Length; i++)
                {
                    if (!IsNameChar(cs[i]))
                    {
                        // Handle surrogate pairs for UTF-32
                        if (UnicodeSpecsHelper.IsHighSurrogate(cs[i]) && i < cs.Length - 1)
                        {
                            if (!IsNameChar(cs[i], cs[i + 1]))
                            {
                                return(false);
                            }
                            i++;
                        }
                        else
                        {
                            return(false);
                        }
                    }
                }
                return(true);
            }
        }