private IToken TryGetVariable() { // Consume first Character which must be a ? ConsumeCharacter(); // Consume other valid Characters char next = Peek(); while (Char.IsLetterOrDigit(next) || UnicodeSpecsHelper.IsLetterOrDigit(next) || next == '-' || next == '_') { ConsumeCharacter(); next = Peek(); } // Validate String value = Value; if (IsValidVarName(value)) { return(new VariableToken(value, CurrentLine, StartPosition, EndPosition)); } else { throw Error("The value '" + value + "' is not valid a Variable Name"); } }
private IToken TryGetDataType() { // Expect to either see a start of a Uri or QName char next = Peek(); if (next == '<') { // Uri specified DataType IToken temp = TryGetUri(); LastTokenType = Token.DATATYPE; return(new DataTypeToken("<" + temp.Value + ">", temp.StartLine, temp.StartPosition, temp.EndPosition)); } else if (Char.IsLetter(next) || UnicodeSpecsHelper.IsLetter(next) || next == '_') { // QName specified Data Type IToken temp = TryGetQName(); if (temp.TokenType == Token.QNAME) { LastTokenType = Token.DATATYPE; return(new DataTypeToken(temp.Value, temp.StartLine, temp.StartPosition, temp.EndPosition)); } else { throw Error("Unexpected Token '" + temp.GetType().ToString() + "' was produced when a QName for a Data Type was expected!"); } } else { // Invalid Start Character throw Error("Unexpected Character (Code " + (int)next + " " + next + "\nExpected a < to start a URI or a valid start character for a QName to specify Data Type"); } }
public static char HexUnescape(String value, ref int index) { if (index + 2 >= value.Length) { throw new RdfParseException("Malformed Percent Encoded Escape"); } if (value[index] != '%') { throw new RdfParseException("Malformed Percent Encoded Escape"); } index = index + 3; return(UnicodeSpecsHelper.ConvertToChar(value.Substring(index + 1, 2))); }
private IToken TryGetVariable() { // Consume first Character which must be a ?/$ ConsumeCharacter(); // Consume other valid Characters char next = Peek(); while (Char.IsLetterOrDigit(next) || UnicodeSpecsHelper.IsLetterOrDigit(next) || next == '-' || next == '_' || next == '\\') { if (next == '\\') { // Check its a valid Escape HandleEscapes(TokeniserEscapeMode.QName); } else { ConsumeCharacter(); } next = Peek(); } // Validate String value = Value; if (value.EndsWith(".")) { Backtrack(); value = value.Substring(0, value.Length - 1); } if (SparqlSpecsHelper.IsValidVarName(value)) { return(new VariableToken(value, CurrentLine, StartPosition, EndPosition)); } else { throw Error("The value '" + value + "' is not valid a Variable Name"); } }
private bool IsValidVarName(String value) { if (_isValidVarName.IsMatch(value)) { return(true); } else { // Have to validate Character by Character char[] cs = value.ToCharArray(); char first = cs[0]; // First character must be an underscore or letter if (first == '_' || Char.IsLetter(first) || UnicodeSpecsHelper.IsLetter(first)) { // Remaining Characters must be underscores, letters, numbers or hyphens for (int i = 1; i < cs.Length; i++) { char c = cs[i]; if (c == '_' || c == '-' || Char.IsLetterOrDigit(c) || UnicodeSpecsHelper.IsLetterOrDigit(c)) { // OK } else { // Invalid Character return(false); } } // If we get here it's all fine return(true); } else { // Invalid Start Character return(false); } } }
/// <summary> /// Handles the special SPARQL escapes that can occur in a local name /// </summary> /// <remarks> /// Unlike <see cref="BaseTokeniser.HandleEscapes()">HandleEscapes()</see> this only unescapes unicode escapes, other escapes are simply validated and passed through for later unescaping /// </remarks> protected void HandleSparqlLocalNameEscapes() { //Grab the first character which must be a \ or % char next = this.SkipCharacter(); //Stuff for Unicode/Hex escapes StringBuilder localOutput; if (next == '\\') { //Backslash based escape next = this.Peek(); switch (next) { case '_': case '-': case '.': case '|': case '$': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case ';': case '=': case ':': case '/': case '?': case '#': case '@': case '%': //Escapable Characters this._output.Append('\\'); this.ConsumeCharacter(); return; case 'u': //Need to consume the u first localOutput = new StringBuilder(); this.SkipCharacter(); next = this.Peek(); //Try to get Four Hex Digits while (localOutput.Length < 4 && this.IsHexDigit(next)) { localOutput.Append(next); this.SkipCharacter(); next = this.Peek(); } //Did we get four Hex Digits if (localOutput.Length != 4) { throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + this._output.ToString() + "\nThe \\u Escape must be followed by four Hex Digits"); } else if (localOutput.ToString().Equals("0000")) { //Ignore the null escape } else { this._output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString())); } return; case 'U': //Need to consume the U first localOutput = new StringBuilder(); this.SkipCharacter(); next = this.Peek(); //Try to get Eight Hex Digits while (localOutput.Length < 8 && this.IsHexDigit(next)) { localOutput.Append(next); this.SkipCharacter(); next = this.Peek(); } //Did we get eight Hex Digits if (localOutput.Length != 8) { throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + this._output.ToString() + "\nThe \\U Escape must be followed by eight Hex Digits"); } else if (localOutput.ToString().Equals("00000000")) { //Ignore the null escape } else { this._output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString())); } return; default: throw Error("Unexpected Backslash Character encountered in a Local Name, the Backslash Character can only be used for Unicode escapes (\\u and \\U) and a limited set of special characters (_-.|&'()*+,;=/?#@%) in Local Names"); } } else if (next == '%') { localOutput = new StringBuilder(); localOutput.Append(next); next = this.Peek(); while (localOutput.Length < 3 && this.IsHexDigit(next)) { localOutput.Append(next); this.SkipCharacter(); next = this.Peek(); } //Did we get % followed by two hex digits if (localOutput.Length != 3) { throw Error("Encountered a % character in a Local Name but the required two hex digits were not present after it, please use \\% if you wish to represent the percent character"); } #if !SILVERLIGHT else if (!Uri.IsHexEncoding(localOutput.ToString(), 0)) #else else if (SilverlightExtensions.IsHexEncoding(localOutput.ToString(), 0)) #endif { throw Error("Invalid % encoded character encountered"); } else { this._output.Append(localOutput.ToString()); } } else { throw Error("HandleSparqlLocalNameEscapes() was called but the next character is not a % or \\ as expected"); } }
/// <summary> /// Handles the standard escapes supported in all the UTF-8 based RDF serializations /// </summary> protected void HandleEscapes(TokeniserEscapeMode mode) { //Grab the first character which must be a \ char next = this.SkipCharacter(); if (next != '\\') { throw Error("HandleEscapes() was called but the first character was not a \\ as expected"); } //Stuff for Unicode escapes StringBuilder localOutput; next = this.Peek(); switch (next) { case '\\': //Backslash escape if (mode != TokeniserEscapeMode.QName) { //Consume this one Backslash this.ConsumeCharacter(); //If this was a backslash escape i.e. \\ //Then need to check whether the subsequent character could be confused with a valid escape //in the tokenised output and if so insert another backslash into the output next = this.Peek(); switch (next) { case 't': case 'n': case 'r': case 'u': case 'U': this._output.Append("\\"); break; } return; } else { goto default; } case '"': //Quote escape (only valid in Quoted Literals) if (mode == TokeniserEscapeMode.QuotedLiterals || mode == TokeniserEscapeMode.QuotedLiteralsAlternate) { //Consume and return this.ConsumeCharacter(); return; } else { goto default; } case '\'': //Single Quote Escape (only valid in Alternate Quoted Literals) if (mode == TokeniserEscapeMode.QuotedLiteralsAlternate) { //Consume and return this.ConsumeCharacter(); return; } else { goto default; } case '>': //End Uri Escape (only valid in URIs) if (mode == TokeniserEscapeMode.Uri) { //Consume and return this.ConsumeCharacter(); return; } else { goto default; } #region White Space Escapes case 'n': //New Line Escape if (mode != TokeniserEscapeMode.QName) { //Discard and append a real New Line to the output this.SkipCharacter(); this._output.Append("\n"); return; } else { goto default; } case 'r': //New Line Escape if (mode != TokeniserEscapeMode.QName) { //Discard and append a real New Line to the output this.SkipCharacter(); this._output.Append("\r"); return; } else { goto default; } case 't': //Tab Escape if (mode != TokeniserEscapeMode.QName) { //Discard and append a real Tab to the output this.SkipCharacter(); this._output.Append("\t"); return; } else { goto default; } #endregion #region Unicode Escapes case 'u': //Need to consume the u first localOutput = new StringBuilder(); this.SkipCharacter(); next = this.Peek(); //Try to get Four Hex Digits while (localOutput.Length < 4 && this.IsHexDigit(next)) { localOutput.Append(next); this.SkipCharacter(); next = this.Peek(); } //Did we get four Hex Digits if (localOutput.Length != 4) { throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + this._output.ToString() + "\nThe \\u Escape must be followed by four Hex Digits"); } else if (localOutput.ToString().Equals("0000")) { //Ignore the null escape } else { this._output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString())); } return; case 'U': //Need to consume the U first localOutput = new StringBuilder(); this.SkipCharacter(); next = this.Peek(); //Try to get Eight Hex Digits while (localOutput.Length < 8 && this.IsHexDigit(next)) { localOutput.Append(next); this.SkipCharacter(); next = this.Peek(); } //Did we get eight Hex Digits if (localOutput.Length != 8) { throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + this._output.ToString() + "\nThe \\U Escape must be followed by eight Hex Digits"); } else if (localOutput.ToString().Equals("00000000")) { //Ignore the null escape } else { this._output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString())); } return; #endregion default: //Not an escape character if (mode != TokeniserEscapeMode.QName) { //Append the \ and then return //Processing continues normally in the caller function this._output.Append("\\"); return; } else { throw Error("Unexpected Backslash Character encountered in a QName, the Backslash Character can only be used for Unicode escapes (\\u and \\U) in QNames"); } } }
/// <summary> /// Handles the standard escapes supported in all the UTF-8 based RDF serializations /// </summary> protected void HandleEscapes(TokeniserEscapeMode mode) { // Grab the first character which must be a \ char next = SkipCharacter(); if (next != '\\') { throw Error("HandleEscapes() was called but the first character was not a \\ as expected"); } // Stuff for Unicode escapes StringBuilder localOutput; bool isLiteral = (mode == TokeniserEscapeMode.QuotedLiterals || mode == TokeniserEscapeMode.QuotedLiteralsAlternate || mode == TokeniserEscapeMode.QuotedLiteralsBoth); next = Peek(); switch (next) { case '\\': // Backslash escape if (isLiteral || mode == TokeniserEscapeMode.PermissiveUri) { // Consume this one Backslash ConsumeCharacter(); return; } goto default; case '"': // Quote escape (only valid in Quoted Literals) if (mode == TokeniserEscapeMode.QuotedLiterals || mode == TokeniserEscapeMode.QuotedLiteralsBoth) { // Consume and return ConsumeCharacter(); return; } goto default; case '\'': // Single Quote Escape (only valid in Alternate Quoted Literals) if (mode == TokeniserEscapeMode.QuotedLiteralsAlternate || mode == TokeniserEscapeMode.QuotedLiteralsBoth) { // Consume and return ConsumeCharacter(); return; } goto default; case '>': // End Uri Escape (only valid in URIs) if (mode == TokeniserEscapeMode.Uri) { // Consume and return ConsumeCharacter(); return; } goto default; case 'n': // New Line Escape if (isLiteral || mode == TokeniserEscapeMode.PermissiveUri) { // Discard and append a real New Line to the output SkipCharacter(); _output.Append('\n'); return; } goto default; case 'r': // New Line Escape if (isLiteral || mode == TokeniserEscapeMode.PermissiveUri) { // Discard and append a real New Line to the output SkipCharacter(); _output.Append('\r'); return; } goto default; case 't': // Tab Escape if (isLiteral || mode == TokeniserEscapeMode.PermissiveUri) { // Discard and append a real Tab to the output SkipCharacter(); _output.Append('\t'); return; } goto default; case 'b': // Backspace Escape if (isLiteral) { // Discard and append a real backspace to the output SkipCharacter(); _output.Append('\b'); return; } goto default; case 'f': // Form Feed Escape if (isLiteral) { // Discard and append a real form feed to the output SkipCharacter(); _output.Append('\f'); return; } goto default; case 'u': // Need to consume the u first localOutput = new StringBuilder(); SkipCharacter(); next = Peek(); // Try to get Four Hex Digits while (localOutput.Length < 4 && IsHexDigit(next)) { localOutput.Append(next); SkipCharacter(); next = Peek(); } // Did we get four Hex Digits if (localOutput.Length != 4) { throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + _output.ToString() + "\nThe \\u Escape must be followed by four Hex Digits"); } _output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString())); return; case 'U': // Need to consume the U first localOutput = new StringBuilder(); SkipCharacter(); next = Peek(); // Try to get Eight Hex Digits while (localOutput.Length < 8 && IsHexDigit(next)) { localOutput.Append(next); SkipCharacter(); next = Peek(); } // Did we get eight Hex Digits if (localOutput.Length != 8) { throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + _output.ToString() + "\nThe \\U Escape must be followed by eight Hex Digits"); } _output.Append(UnicodeSpecsHelper.ConvertToChars(localOutput.ToString())); return; default: // Not an escape character throw Error("Invalid escape sequence encountered, \\" + next + " is not a valid escape sequence in the current token"); } }