public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) { token = null; if (inputBytes == null) { return(false); } if (currentByte != '(') { return(false); } var builder = stringBuilder; var numberOfBrackets = 1; var isEscapeActive = false; var isLineBreaking = false; var octalModeActive = false; short[] octal = { 0, 0, 0 }; var octalsRead = 0; while (inputBytes.MoveNext()) { var b = inputBytes.CurrentByte; var c = (char)b; if (octalModeActive) { var nextCharacterOctal = c >= '0' && c <= '7'; if (nextCharacterOctal) { // left shift the octals. LeftShiftOctal(c, octalsRead, octal); octalsRead++; } if (octalsRead == 3 || !nextCharacterOctal) { var characterCode = OctalHelpers.FromOctalDigits(octal); // For now :( // TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers builder.Append((char)characterCode); octal[0] = 0; octal[1] = 0; octal[2] = 0; octalsRead = 0; octalModeActive = false; } if (nextCharacterOctal) { continue; } } switch (c) { case ')': isLineBreaking = false; if (!isEscapeActive) { numberOfBrackets--; } isEscapeActive = false; if (numberOfBrackets > 0) { builder.Append(c); } // TODO: Check for other ends of string where the string is improperly formatted. See commented method numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes); break; case '(': isLineBreaking = false; if (!isEscapeActive) { numberOfBrackets++; } isEscapeActive = false; builder.Append(c); break; // Escape case '\\': isLineBreaking = false; // Escaped backslash if (isEscapeActive) { builder.Append(c); isEscapeActive = false; } else { isEscapeActive = true; } break; default: if (isLineBreaking) { if (ReadHelper.IsEndOfLine(c)) { continue; } isLineBreaking = false; builder.Append(c); } else if (isEscapeActive) { ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking); isEscapeActive = false; } else { builder.Append(c); } break; } if (numberOfBrackets <= 0) { break; } } StringToken.Encoding encodedWith; string tokenStr; if (builder.Length >= 2) { if (builder[0] == 0xFE && builder[1] == 0xFF) { var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes).Substring(1); encodedWith = StringToken.Encoding.Utf16BE; } else if (builder[0] == 0xFF && builder[1] == 0xFE) { var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); tokenStr = Encoding.Unicode.GetString(rawBytes).Substring(1); encodedWith = StringToken.Encoding.Utf16; } else { tokenStr = builder.ToString(); encodedWith = StringToken.Encoding.Iso88591; } } else { tokenStr = builder.ToString(); encodedWith = StringToken.Encoding.Iso88591; } builder.Clear(); token = new StringToken(tokenStr, encodedWith); return(true); }
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) { var builder = new StringBuilder(); token = null; if (inputBytes == null) { return(false); } if (currentByte != '(') { return(false); } int numberOfBrackets = 1; bool isEscapeActive = false; bool isLineBreaking = false; bool octalModeActive = false; short[] octal = { 0, 0, 0 }; int octalsRead = 0; while (inputBytes.MoveNext()) { var b = inputBytes.CurrentByte; var c = (char)b; if (octalModeActive) { var nextCharacterOctal = c >= '0' && c <= '7'; if (nextCharacterOctal) { // left shift the octals. LeftShiftOctal(c, octalsRead, octal); octalsRead++; } if (octalsRead == 3 || !nextCharacterOctal) { var characterCode = OctalHelpers.FromOctalDigits(octal); // For now :( // TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers builder.Append((char)characterCode); octal[0] = 0; octal[1] = 0; octal[2] = 0; octalsRead = 0; octalModeActive = false; } if (nextCharacterOctal) { continue; } } switch (c) { case ')': isLineBreaking = false; if (!isEscapeActive) { numberOfBrackets--; } isEscapeActive = false; if (numberOfBrackets > 0) { builder.Append(c); } // TODO: Check for other ends of string where the string is improperly formatted. See commented method // numberOfBrackets = CheckForEndOfString(inputBytes, numberOfBrackets); break; case '(': isLineBreaking = false; if (!isEscapeActive) { numberOfBrackets++; } isEscapeActive = false; builder.Append(c); break; // Escape case '\\': isLineBreaking = false; // Escaped backslash if (isEscapeActive) { builder.Append(c); } else { isEscapeActive = true; } break; default: if (isLineBreaking) { if (ReadHelper.IsEndOfLine(c)) { continue; } isLineBreaking = false; builder.Append(c); } else if (isEscapeActive) { ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking); isEscapeActive = false; } else { builder.Append(c); } break; } if (numberOfBrackets <= 0) { break; } } token = new StringToken(builder.ToString()); return(true); }