/// <summary> /// Handles the standard escapes supported in all the UTF-8 based RDF serializations /// </summary> protected void HandleEscapes(TokeniserEscapeMode mode) { // Grab the first character which must be a \ char next = SkipCharacter(); if (next != '\\') { throw Error("HandleEscapes() was called but the first character was not a \\ as expected"); } // Stuff for Unicode escapes StringBuilder localOutput; bool isLiteral = (mode == TokeniserEscapeMode.QuotedLiterals || mode == TokeniserEscapeMode.QuotedLiteralsAlternate || mode == TokeniserEscapeMode.QuotedLiteralsBoth); next = Peek(); switch (next) { case '\\': // Backslash escape if (isLiteral || mode == TokeniserEscapeMode.PermissiveUri) { // Consume this one Backslash ConsumeCharacter(); return; } goto default; case '"': // Quote escape (only valid in Quoted Literals) if (mode == TokeniserEscapeMode.QuotedLiterals || mode == TokeniserEscapeMode.QuotedLiteralsBoth) { // Consume and return ConsumeCharacter(); return; } goto default; case '\'': // Single Quote Escape (only valid in Alternate Quoted Literals) if (mode == TokeniserEscapeMode.QuotedLiteralsAlternate || mode == TokeniserEscapeMode.QuotedLiteralsBoth) { // Consume and return ConsumeCharacter(); return; } goto default; case '>': // End Uri Escape (only valid in URIs) if (mode == TokeniserEscapeMode.Uri) { // Consume and return ConsumeCharacter(); return; } goto default; case 'n': // New Line Escape if (isLiteral || mode == TokeniserEscapeMode.PermissiveUri) { // Discard and append a real New Line to the output SkipCharacter(); _output.Append('\n'); return; } goto default; case 'r': // New Line Escape if (isLiteral || mode == TokeniserEscapeMode.PermissiveUri) { // Discard and append a real New Line to the output SkipCharacter(); _output.Append('\r'); return; } goto default; case 't': // Tab Escape if (isLiteral || mode == TokeniserEscapeMode.PermissiveUri) { // Discard and append a real Tab to the output SkipCharacter(); _output.Append('\t'); return; } goto default; case 'b': // Backspace Escape if (isLiteral) { // Discard and append a real backspace to the output SkipCharacter(); _output.Append('\b'); return; } goto default; case 'f': // Form Feed Escape if (isLiteral) { // Discard and append a real form feed to the output SkipCharacter(); _output.Append('\f'); return; } goto default; case 'u': // Need to consume the u first localOutput = new StringBuilder(); SkipCharacter(); next = Peek(); // Try to get Four Hex Digits while (localOutput.Length < 4 && IsHexDigit(next)) { localOutput.Append(next); SkipCharacter(); next = Peek(); } // Did we get four Hex Digits if (localOutput.Length != 4) { throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + _output.ToString() + "\nThe \\u Escape must be followed by four Hex Digits"); } _output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString())); return; case 'U': // Need to consume the U first localOutput = new StringBuilder(); SkipCharacter(); next = Peek(); // Try to get Eight Hex Digits while (localOutput.Length < 8 && IsHexDigit(next)) { localOutput.Append(next); SkipCharacter(); next = Peek(); } // Did we get eight Hex Digits if (localOutput.Length != 8) { throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + _output.ToString() + "\nThe \\U Escape must be followed by eight Hex Digits"); } _output.Append(UnicodeSpecsHelper.ConvertToChars(localOutput.ToString())); return; default: // Not an escape character throw Error("Invalid escape sequence encountered, \\" + next + " is not a valid escape sequence in the current token"); } }
/// <summary> /// Handles the standard escapes supported in all the UTF-8 based RDF serializations /// </summary> protected void HandleEscapes(TokeniserEscapeMode mode) { //Grab the first character which must be a \ char next = this.SkipCharacter(); if (next != '\\') { throw Error("HandleEscapes() was called but the first character was not a \\ as expected"); } //Stuff for Unicode escapes StringBuilder localOutput; next = this.Peek(); switch (next) { case '\\': //Backslash escape if (mode != TokeniserEscapeMode.QName) { //Consume this one Backslash this.ConsumeCharacter(); //If this was a backslash escape i.e. \\ //Then need to check whether the subsequent character could be confused with a valid escape //in the tokenised output and if so insert another backslash into the output next = this.Peek(); switch (next) { case 't': case 'n': case 'r': case 'u': case 'U': this._output.Append("\\"); break; } return; } else { goto default; } case '"': //Quote escape (only valid in Quoted Literals) if (mode == TokeniserEscapeMode.QuotedLiterals || mode == TokeniserEscapeMode.QuotedLiteralsAlternate) { //Consume and return this.ConsumeCharacter(); return; } else { goto default; } case '\'': //Single Quote Escape (only valid in Alternate Quoted Literals) if (mode == TokeniserEscapeMode.QuotedLiteralsAlternate) { //Consume and return this.ConsumeCharacter(); return; } else { goto default; } case '>': //End Uri Escape (only valid in URIs) if (mode == TokeniserEscapeMode.Uri) { //Consume and return this.ConsumeCharacter(); return; } else { goto default; } #region White Space Escapes case 'n': //New Line Escape if (mode != TokeniserEscapeMode.QName) { //Discard and append a real New Line to the output this.SkipCharacter(); this._output.Append("\n"); return; } else { goto default; } case 'r': //New Line Escape if (mode != TokeniserEscapeMode.QName) { //Discard and append a real New Line to the output this.SkipCharacter(); this._output.Append("\r"); return; } else { goto default; } case 't': //Tab Escape if (mode != TokeniserEscapeMode.QName) { //Discard and append a real Tab to the output this.SkipCharacter(); this._output.Append("\t"); return; } else { goto default; } #endregion #region Unicode Escapes case 'u': //Need to consume the u first localOutput = new StringBuilder(); this.SkipCharacter(); next = this.Peek(); //Try to get Four Hex Digits while (localOutput.Length < 4 && this.IsHexDigit(next)) { localOutput.Append(next); this.SkipCharacter(); next = this.Peek(); } //Did we get four Hex Digits if (localOutput.Length != 4) { throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + this._output.ToString() + "\nThe \\u Escape must be followed by four Hex Digits"); } else if (localOutput.ToString().Equals("0000")) { //Ignore the null escape } else { this._output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString())); } return; case 'U': //Need to consume the U first localOutput = new StringBuilder(); this.SkipCharacter(); next = this.Peek(); //Try to get Eight Hex Digits while (localOutput.Length < 8 && this.IsHexDigit(next)) { localOutput.Append(next); this.SkipCharacter(); next = this.Peek(); } //Did we get eight Hex Digits if (localOutput.Length != 8) { throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + this._output.ToString() + "\nThe \\U Escape must be followed by eight Hex Digits"); } else if (localOutput.ToString().Equals("00000000")) { //Ignore the null escape } else { this._output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString())); } return; #endregion default: //Not an escape character if (mode != TokeniserEscapeMode.QName) { //Append the \ and then return //Processing continues normally in the caller function this._output.Append("\\"); return; } else { throw Error("Unexpected Backslash Character encountered in a QName, the Backslash Character can only be used for Unicode escapes (\\u and \\U) in QNames"); } } }
/// <summary> /// Handles the standard escapes supported in all the UTF-8 based RDF serializations /// </summary> protected void HandleEscapes(TokeniserEscapeMode mode) { //Grab the first character which must be a \ char next = this.SkipCharacter(); //Stuff for Unicode escapes StringBuilder localOutput; next = this.Peek(); switch (next) { case '\\': //Backslash escape if (mode != TokeniserEscapeMode.QName) { //Consume this one Backslash this.ConsumeCharacter(); return; } else { goto default; } case '"': //Quote escape (only valid in Quoted Literals) if (mode == TokeniserEscapeMode.QuotedLiterals || mode == TokeniserEscapeMode.QuotedLiteralsAlternate) { //Consume and return this.ConsumeCharacter(); return; } else { goto default; } case '\'': //Single Quote Escape (only valid in Alternate Quoted Literals) if (mode == TokeniserEscapeMode.QuotedLiteralsAlternate) { //Consume and return this.ConsumeCharacter(); return; } else { goto default; } case '>': //End Uri Escape (only valid in URIs) if (mode == TokeniserEscapeMode.Uri) { //Consume and return this.ConsumeCharacter(); return; } else { goto default; } #region White Space Escapes case 'n': //New Line Escape if (mode != TokeniserEscapeMode.QName) { //Discard and append a real New Line to the output this.SkipCharacter(); this._output.Append("\n"); return; } else { goto default; } case 'r': //New Line Escape if (mode != TokeniserEscapeMode.QName) { //Discard and append a real New Line to the output this.SkipCharacter(); this._output.Append("\r"); return; } else { goto default; } case 't': //Tab Escape if (mode != TokeniserEscapeMode.QName) { //Discard and append a real Tab to the output this.SkipCharacter(); this._output.Append("\t"); return; } else { goto default; } #endregion #region Unicode Escapes case 'u': //Need to consume the u first localOutput = new StringBuilder(); this.SkipCharacter(); next = this.Peek(); //Try to get Four Hex Digits while (localOutput.Length < 4 && this.IsHexDigit(next)) { localOutput.Append(next); this.SkipCharacter(); next = this.Peek(); } //Did we get four Hex Digits if (localOutput.Length != 4) { throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + this._output.ToString() + "\nThe \\u Escape must be followed by four Hex Digits"); } else if (localOutput.ToString().Equals("0000")) { //Ignore the null escape } else { this._output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString())); } return; case 'U': //Need to consume the U first localOutput = new StringBuilder(); this.SkipCharacter(); next = this.Peek(); //Try to get Eight Hex Digits while (localOutput.Length < 8 && this.IsHexDigit(next)) { localOutput.Append(next); this.SkipCharacter(); next = this.Peek(); } //Did we get eight Hex Digits this._output.Append(localOutput.ToString()); if (localOutput.Length != 8) { throw Error("Unexpected Character (Code " + (int)next + "): " + next + " encountered while trying to parse Unicode Escape from Content:\n" + this._output.ToString() + "\nThe \\U Escape must be followed by eight Hex Digits"); } else if (localOutput.ToString().Equals("00000000")) { //Ignore the null escape } else { this._output.Append(UnicodeSpecsHelper.ConvertToChar(localOutput.ToString())); } return; #endregion default: //Not an escape character if (mode != TokeniserEscapeMode.QName) { //Append the \ and then return //Processing continues normally in the caller function this._output.Append("\\"); return; } else { throw Error("Unexpected Backslash Character encountered in a QName, the Backslash Character can only be used for Unicode escapes (\\u and \\U) in QNames"); } } }