private IToken TryGetVariable()
        {
            // Consume first Character which must be a ?
            ConsumeCharacter();

            // Consume other valid Characters
            char next = Peek();

            while (Char.IsLetterOrDigit(next) || UnicodeSpecsHelper.IsLetterOrDigit(next) || next == '-' || next == '_')
            {
                ConsumeCharacter();
                next = Peek();
            }

            // Validate
            String value = Value;

            if (IsValidVarName(value))
            {
                return(new VariableToken(value, CurrentLine, StartPosition, EndPosition));
            }
            else
            {
                throw Error("The value '" + value + "' is not valid a Variable Name");
            }
        }
        private IToken TryGetDataType()
        {
            // Expect to either see a start of a Uri or QName
            char next = Peek();

            if (next == '<')
            {
                // Uri specified DataType
                IToken temp = TryGetUri();
                LastTokenType = Token.DATATYPE;
                return(new DataTypeToken("<" + temp.Value + ">", temp.StartLine, temp.StartPosition, temp.EndPosition));
            }
            else if (Char.IsLetter(next) || UnicodeSpecsHelper.IsLetter(next) || next == '_')
            {
                // QName specified Data Type
                IToken temp = TryGetQName();
                if (temp.TokenType == Token.QNAME)
                {
                    LastTokenType = Token.DATATYPE;
                    return(new DataTypeToken(temp.Value, temp.StartLine, temp.StartPosition, temp.EndPosition));
                }
                else
                {
                    throw Error("Unexpected Token '" + temp.GetType().ToString() + "' was produced when a QName for a Data Type was expected!");
                }
            }
            else
            {
                // Invalid Start Character
                throw Error("Unexpected Character (Code " + (int)next + " " + next + "\nExpected a < to start a URI or a valid start character for a QName to specify Data Type");
            }
        }
Beispiel #3
0
 public static char HexUnescape(String value, ref int index)
 {
     if (index + 2 >= value.Length)
     {
         throw new RdfParseException("Malformed Percent Encoded Escape");
     }
     if (value[index] != '%')
     {
         throw new RdfParseException("Malformed Percent Encoded Escape");
     }
     index = index + 3;
     return(UnicodeSpecsHelper.ConvertToChar(value.Substring(index + 1, 2)));
 }
Beispiel #4
0
        private IToken TryGetVariable()
        {
            // Consume first Character which must be a ?/$
            ConsumeCharacter();

            // Consume other valid Characters
            char next = Peek();

            while (Char.IsLetterOrDigit(next) || UnicodeSpecsHelper.IsLetterOrDigit(next) || next == '-' || next == '_' || next == '\\')
            {
                if (next == '\\')
                {
                    // Check its a valid Escape
                    HandleEscapes(TokeniserEscapeMode.QName);
                }
                else
                {
                    ConsumeCharacter();
                }
                next = Peek();
            }

            // Validate
            String value = Value;

            if (value.EndsWith("."))
            {
                Backtrack();
                value = value.Substring(0, value.Length - 1);
            }

            if (SparqlSpecsHelper.IsValidVarName(value))
            {
                return(new VariableToken(value, CurrentLine, StartPosition, EndPosition));
            }
            else
            {
                throw Error("The value '" + value + "' is not valid a Variable Name");
            }
        }
        private bool IsValidVarName(String value)
        {
            if (_isValidVarName.IsMatch(value))
            {
                return(true);
            }
            else
            {
                // Have to validate Character by Character
                char[] cs    = value.ToCharArray();
                char   first = cs[0];

                // First character must be an underscore or letter
                if (first == '_' || Char.IsLetter(first) || UnicodeSpecsHelper.IsLetter(first))
                {
                    // Remaining Characters must be underscores, letters, numbers or hyphens
                    for (int i = 1; i < cs.Length; i++)
                    {
                        char c = cs[i];
                        if (c == '_' || c == '-' || Char.IsLetterOrDigit(c) || UnicodeSpecsHelper.IsLetterOrDigit(c))
                        {
                            // OK
                        }
                        else
                        {
                            // Invalid Character
                            return(false);
                        }
                    }

                    // If we get here it's all fine
                    return(true);
                }
                else
                {
                    // Invalid Start Character
                    return(false);
                }
            }
        }
Beispiel #6
0
        /// <summary>
        /// Handles the special SPARQL escapes that can occur in a local name
        /// </summary>
        /// <remarks>
        /// Unlike <see cref="BaseTokeniser.HandleEscapes()">HandleEscapes()</see> this only unescapes unicode escapes, other escapes are simply validated and passed through for later unescaping
        /// </remarks>
        protected void HandleSparqlLocalNameEscapes()
        {
            //Grab the first character which must be a \ or %
            char next = this.SkipCharacter();

            //Stuff for Unicode/Hex escapes
            StringBuilder localOutput;

            if (next == '\\')
            {
                //Backslash based escape
                next = this.Peek();
                switch (next)
                {
                case '_':
                case '-':
                case '.':
                case '|':
                case '$':
                case '&':
                case '\'':
                case '(':
                case ')':
                case '*':
                case '+':
                case ',':
                case ';':
                case '=':
                case ':':
                case '/':
                case '?':
                case '#':
                case '@':
                case '%':
                    //Escapable Characters
                    this._output.Append('\\');
                    this.ConsumeCharacter();
                    return;

                case 'u':
                    //Need to consume the u first
                    localOutput = new StringBuilder();
                    this.SkipCharacter();

                    next = this.Peek();

                    //Try to get Four Hex Digits
                    while (localOutput.Length < 4 && this.IsHexDigit(next))
                    {
                        localOutput.Append(next);
                        this.SkipCharacter();
                        next = this.Peek();
                    }

                    //Did we get four Hex Digits
                    if (localOutput.Length != 4)
                    {
                        throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + this._output.ToString() + "\nThe \\u Escape must be followed by four Hex Digits");
                    }
                    else if (localOutput.ToString().Equals("0000"))
                    {
                        //Ignore the null escape
                    }
                    else
                    {
                        this._output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString()));
                    }

                    return;

                case 'U':
                    //Need to consume the U first
                    localOutput = new StringBuilder();
                    this.SkipCharacter();

                    next = this.Peek();

                    //Try to get Eight Hex Digits
                    while (localOutput.Length < 8 && this.IsHexDigit(next))
                    {
                        localOutput.Append(next);
                        this.SkipCharacter();
                        next = this.Peek();
                    }

                    //Did we get eight Hex Digits
                    if (localOutput.Length != 8)
                    {
                        throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + this._output.ToString() + "\nThe \\U Escape must be followed by eight Hex Digits");
                    }
                    else if (localOutput.ToString().Equals("00000000"))
                    {
                        //Ignore the null escape
                    }
                    else
                    {
                        this._output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString()));
                    }
                    return;

                default:
                    throw Error("Unexpected Backslash Character encountered in a Local Name, the Backslash Character can only be used for Unicode escapes (\\u and \\U) and a limited set of special characters (_-.|&'()*+,;=/?#@%) in Local Names");
                }
            }
            else if (next == '%')
            {
                localOutput = new StringBuilder();
                localOutput.Append(next);

                next = this.Peek();
                while (localOutput.Length < 3 && this.IsHexDigit(next))
                {
                    localOutput.Append(next);
                    this.SkipCharacter();
                    next = this.Peek();
                }

                //Did we get % followed by two hex digits
                if (localOutput.Length != 3)
                {
                    throw Error("Encountered a % character in a Local Name but the required two hex digits were not present after it, please use \\% if you wish to represent the percent character");
                }
#if !SILVERLIGHT
                else if (!Uri.IsHexEncoding(localOutput.ToString(), 0))
#else
                else if (SilverlightExtensions.IsHexEncoding(localOutput.ToString(), 0))
#endif
                {
                    throw Error("Invalid % encoded character encountered");
                }
                else
                {
                    this._output.Append(localOutput.ToString());
                }
            }
            else
            {
                throw Error("HandleSparqlLocalNameEscapes() was called but the next character is not a % or \\ as expected");
            }
        }
Beispiel #7
0
        /// <summary>
        /// Handles the standard escapes supported in all the  UTF-8 based RDF serializations
        /// </summary>
        protected void HandleEscapes(TokeniserEscapeMode mode)
        {
            //Grab the first character which must be a \
            char next = this.SkipCharacter();

            if (next != '\\')
            {
                throw Error("HandleEscapes() was called but the first character was not a \\ as expected");
            }

            //Stuff for Unicode escapes
            StringBuilder localOutput;

            next = this.Peek();
            switch (next)
            {
            case '\\':
                //Backslash escape
                if (mode != TokeniserEscapeMode.QName)
                {
                    //Consume this one Backslash
                    this.ConsumeCharacter();

                    //If this was a backslash escape i.e. \\
                    //Then need to check whether the subsequent character could be confused with a valid escape
                    //in the tokenised output and if so insert another backslash into the output
                    next = this.Peek();
                    switch (next)
                    {
                    case 't':
                    case 'n':
                    case 'r':
                    case 'u':
                    case 'U':
                        this._output.Append("\\");
                        break;
                    }

                    return;
                }
                else
                {
                    goto default;
                }

            case '"':
                //Quote escape (only valid in Quoted Literals)
                if (mode == TokeniserEscapeMode.QuotedLiterals || mode == TokeniserEscapeMode.QuotedLiteralsAlternate)
                {
                    //Consume and return
                    this.ConsumeCharacter();
                    return;
                }
                else
                {
                    goto default;
                }

            case '\'':
                //Single Quote Escape (only valid in Alternate Quoted Literals)
                if (mode == TokeniserEscapeMode.QuotedLiteralsAlternate)
                {
                    //Consume and return
                    this.ConsumeCharacter();
                    return;
                }
                else
                {
                    goto default;
                }

            case '>':
                //End Uri Escape (only valid in URIs)
                if (mode == TokeniserEscapeMode.Uri)
                {
                    //Consume and return
                    this.ConsumeCharacter();
                    return;
                }
                else
                {
                    goto default;
                }

                #region White Space Escapes

            case 'n':
                //New Line Escape
                if (mode != TokeniserEscapeMode.QName)
                {
                    //Discard and append a real New Line to the output
                    this.SkipCharacter();
                    this._output.Append("\n");
                    return;
                }
                else
                {
                    goto default;
                }

            case 'r':
                //New Line Escape
                if (mode != TokeniserEscapeMode.QName)
                {
                    //Discard and append a real New Line to the output
                    this.SkipCharacter();
                    this._output.Append("\r");
                    return;
                }
                else
                {
                    goto default;
                }

            case 't':
                //Tab Escape
                if (mode != TokeniserEscapeMode.QName)
                {
                    //Discard and append a real Tab to the output
                    this.SkipCharacter();
                    this._output.Append("\t");
                    return;
                }
                else
                {
                    goto default;
                }

                #endregion

                #region Unicode Escapes

            case 'u':
                //Need to consume the u first
                localOutput = new StringBuilder();
                this.SkipCharacter();

                next = this.Peek();

                //Try to get Four Hex Digits
                while (localOutput.Length < 4 && this.IsHexDigit(next))
                {
                    localOutput.Append(next);
                    this.SkipCharacter();
                    next = this.Peek();
                }

                //Did we get four Hex Digits
                if (localOutput.Length != 4)
                {
                    throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + this._output.ToString() + "\nThe \\u Escape must be followed by four Hex Digits");
                }
                else if (localOutput.ToString().Equals("0000"))
                {
                    //Ignore the null escape
                }
                else
                {
                    this._output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString()));
                }

                return;

            case 'U':
                //Need to consume the U first
                localOutput = new StringBuilder();
                this.SkipCharacter();

                next = this.Peek();

                //Try to get Eight Hex Digits
                while (localOutput.Length < 8 && this.IsHexDigit(next))
                {
                    localOutput.Append(next);
                    this.SkipCharacter();
                    next = this.Peek();
                }

                //Did we get eight Hex Digits
                if (localOutput.Length != 8)
                {
                    throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + this._output.ToString() + "\nThe \\U Escape must be followed by eight Hex Digits");
                }
                else if (localOutput.ToString().Equals("00000000"))
                {
                    //Ignore the null escape
                }
                else
                {
                    this._output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString()));
                }
                return;

                #endregion

            default:
                //Not an escape character
                if (mode != TokeniserEscapeMode.QName)
                {
                    //Append the \ and then return
                    //Processing continues normally in the caller function
                    this._output.Append("\\");
                    return;
                }
                else
                {
                    throw Error("Unexpected Backslash Character encountered in a QName, the Backslash Character can only be used for Unicode escapes (\\u and \\U) in QNames");
                }
            }
        }
        /// <summary>
        /// Handles the standard escapes supported in all the  UTF-8 based RDF serializations
        /// </summary>
        protected void HandleEscapes(TokeniserEscapeMode mode)
        {
            // Grab the first character which must be a \
            char next = SkipCharacter();

            if (next != '\\')
            {
                throw Error("HandleEscapes() was called but the first character was not a \\ as expected");
            }

            // Stuff for Unicode escapes
            StringBuilder localOutput;

            bool isLiteral = (mode == TokeniserEscapeMode.QuotedLiterals || mode == TokeniserEscapeMode.QuotedLiteralsAlternate || mode == TokeniserEscapeMode.QuotedLiteralsBoth);

            next = Peek();
            switch (next)
            {
            case '\\':
                // Backslash escape
                if (isLiteral || mode == TokeniserEscapeMode.PermissiveUri)
                {
                    // Consume this one Backslash
                    ConsumeCharacter();
                    return;
                }
                goto default;

            case '"':
                // Quote escape (only valid in Quoted Literals)
                if (mode == TokeniserEscapeMode.QuotedLiterals || mode == TokeniserEscapeMode.QuotedLiteralsBoth)
                {
                    // Consume and return
                    ConsumeCharacter();
                    return;
                }
                goto default;

            case '\'':
                // Single Quote Escape (only valid in Alternate Quoted Literals)
                if (mode == TokeniserEscapeMode.QuotedLiteralsAlternate || mode == TokeniserEscapeMode.QuotedLiteralsBoth)
                {
                    // Consume and return
                    ConsumeCharacter();
                    return;
                }
                goto default;

            case '>':
                // End Uri Escape (only valid in URIs)
                if (mode == TokeniserEscapeMode.Uri)
                {
                    // Consume and return
                    ConsumeCharacter();
                    return;
                }
                goto default;

            case 'n':
                // New Line Escape
                if (isLiteral || mode == TokeniserEscapeMode.PermissiveUri)
                {
                    // Discard and append a real New Line to the output
                    SkipCharacter();
                    _output.Append('\n');
                    return;
                }
                goto default;

            case 'r':
                // New Line Escape
                if (isLiteral || mode == TokeniserEscapeMode.PermissiveUri)
                {
                    // Discard and append a real New Line to the output
                    SkipCharacter();
                    _output.Append('\r');
                    return;
                }
                goto default;

            case 't':
                // Tab Escape
                if (isLiteral || mode == TokeniserEscapeMode.PermissiveUri)
                {
                    // Discard and append a real Tab to the output
                    SkipCharacter();
                    _output.Append('\t');
                    return;
                }
                goto default;

            case 'b':
                // Backspace Escape
                if (isLiteral)
                {
                    // Discard and append a real backspace to the output
                    SkipCharacter();
                    _output.Append('\b');
                    return;
                }
                goto default;

            case 'f':
                // Form Feed Escape
                if (isLiteral)
                {
                    // Discard and append a real form feed to the output
                    SkipCharacter();
                    _output.Append('\f');
                    return;
                }
                goto default;

            case 'u':
                // Need to consume the u first
                localOutput = new StringBuilder();
                SkipCharacter();

                next = Peek();

                // Try to get Four Hex Digits
                while (localOutput.Length < 4 && IsHexDigit(next))
                {
                    localOutput.Append(next);
                    SkipCharacter();
                    next = Peek();
                }

                // Did we get four Hex Digits
                if (localOutput.Length != 4)
                {
                    throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + _output.ToString() + "\nThe \\u Escape must be followed by four Hex Digits");
                }
                _output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString()));
                return;

            case 'U':
                // Need to consume the U first
                localOutput = new StringBuilder();
                SkipCharacter();

                next = Peek();

                // Try to get Eight Hex Digits
                while (localOutput.Length < 8 && IsHexDigit(next))
                {
                    localOutput.Append(next);
                    SkipCharacter();
                    next = Peek();
                }

                // Did we get eight Hex Digits
                if (localOutput.Length != 8)
                {
                    throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + _output.ToString() + "\nThe \\U Escape must be followed by eight Hex Digits");
                }
                _output.Append(UnicodeSpecsHelper.ConvertToChars(localOutput.ToString()));
                return;

            default:
                // Not an escape character
                throw Error("Invalid escape sequence encountered, \\" + next + " is not a valid escape sequence in the current token");
            }
        }