Ejemplo n.º 1
0
        public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
        {
            token = null;

            if (inputBytes == null)
            {
                return(false);
            }

            if (currentByte != '(')
            {
                return(false);
            }

            var builder          = stringBuilder;
            var numberOfBrackets = 1;
            var isEscapeActive   = false;
            var isLineBreaking   = false;

            var octalModeActive = false;

            short[] octal      = { 0, 0, 0 };
            var     octalsRead = 0;

            while (inputBytes.MoveNext())
            {
                var b = inputBytes.CurrentByte;
                var c = (char)b;

                if (octalModeActive)
                {
                    var nextCharacterOctal = c >= '0' && c <= '7';

                    if (nextCharacterOctal)
                    {
                        // left shift the octals.
                        LeftShiftOctal(c, octalsRead, octal);
                        octalsRead++;
                    }

                    if (octalsRead == 3 || !nextCharacterOctal)
                    {
                        var characterCode = OctalHelpers.FromOctalDigits(octal);

                        // For now :(
                        // TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers
                        builder.Append((char)characterCode);

                        octal[0]        = 0;
                        octal[1]        = 0;
                        octal[2]        = 0;
                        octalsRead      = 0;
                        octalModeActive = false;
                    }

                    if (nextCharacterOctal)
                    {
                        continue;
                    }
                }

                switch (c)
                {
                case ')':
                    isLineBreaking = false;
                    if (!isEscapeActive)
                    {
                        numberOfBrackets--;
                    }

                    isEscapeActive = false;
                    if (numberOfBrackets > 0)
                    {
                        builder.Append(c);
                    }

                    // TODO: Check for other ends of string where the string is improperly formatted. See commented method
                    numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes);

                    break;

                case '(':
                    isLineBreaking = false;

                    if (!isEscapeActive)
                    {
                        numberOfBrackets++;
                    }

                    isEscapeActive = false;
                    builder.Append(c);
                    break;

                // Escape
                case '\\':
                    isLineBreaking = false;
                    // Escaped backslash
                    if (isEscapeActive)
                    {
                        builder.Append(c);
                        isEscapeActive = false;
                    }
                    else
                    {
                        isEscapeActive = true;
                    }
                    break;

                default:
                    if (isLineBreaking)
                    {
                        if (ReadHelper.IsEndOfLine(c))
                        {
                            continue;
                        }

                        isLineBreaking = false;
                        builder.Append(c);
                    }
                    else if (isEscapeActive)
                    {
                        ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking);
                        isEscapeActive = false;
                    }
                    else
                    {
                        builder.Append(c);
                    }

                    break;
                }

                if (numberOfBrackets <= 0)
                {
                    break;
                }
            }

            StringToken.Encoding encodedWith;
            string tokenStr;

            if (builder.Length >= 2)
            {
                if (builder[0] == 0xFE && builder[1] == 0xFF)
                {
                    var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());

                    tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes).Substring(1);

                    encodedWith = StringToken.Encoding.Utf16BE;
                }
                else if (builder[0] == 0xFF && builder[1] == 0xFE)
                {
                    var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());

                    tokenStr = Encoding.Unicode.GetString(rawBytes).Substring(1);

                    encodedWith = StringToken.Encoding.Utf16;
                }
                else
                {
                    tokenStr = builder.ToString();

                    encodedWith = StringToken.Encoding.Iso88591;
                }
            }
            else
            {
                tokenStr = builder.ToString();

                encodedWith = StringToken.Encoding.Iso88591;
            }

            builder.Clear();

            token = new StringToken(tokenStr, encodedWith);

            return(true);
        }
Ejemplo n.º 2
0
        public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
        {
            var builder = new StringBuilder();

            token = null;

            if (inputBytes == null)
            {
                return(false);
            }

            if (currentByte != '(')
            {
                return(false);
            }

            int  numberOfBrackets = 1;
            bool isEscapeActive   = false;
            bool isLineBreaking   = false;

            bool octalModeActive = false;

            short[] octal      = { 0, 0, 0 };
            int     octalsRead = 0;

            while (inputBytes.MoveNext())
            {
                var b = inputBytes.CurrentByte;
                var c = (char)b;

                if (octalModeActive)
                {
                    var nextCharacterOctal = c >= '0' && c <= '7';

                    if (nextCharacterOctal)
                    {
                        // left shift the octals.
                        LeftShiftOctal(c, octalsRead, octal);
                        octalsRead++;
                    }

                    if (octalsRead == 3 || !nextCharacterOctal)
                    {
                        var characterCode = OctalHelpers.FromOctalDigits(octal);

                        // For now :(
                        // TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers
                        builder.Append((char)characterCode);

                        octal[0]        = 0;
                        octal[1]        = 0;
                        octal[2]        = 0;
                        octalsRead      = 0;
                        octalModeActive = false;
                    }

                    if (nextCharacterOctal)
                    {
                        continue;
                    }
                }

                switch (c)
                {
                case ')':
                    isLineBreaking = false;
                    if (!isEscapeActive)
                    {
                        numberOfBrackets--;
                    }

                    isEscapeActive = false;
                    if (numberOfBrackets > 0)
                    {
                        builder.Append(c);
                    }

                    // TODO: Check for other ends of string where the string is improperly formatted. See commented method
                    // numberOfBrackets = CheckForEndOfString(inputBytes, numberOfBrackets);


                    break;

                case '(':
                    isLineBreaking = false;

                    if (!isEscapeActive)
                    {
                        numberOfBrackets++;
                    }

                    isEscapeActive = false;
                    builder.Append(c);
                    break;

                // Escape
                case '\\':
                    isLineBreaking = false;
                    // Escaped backslash
                    if (isEscapeActive)
                    {
                        builder.Append(c);
                    }
                    else
                    {
                        isEscapeActive = true;
                    }
                    break;

                default:
                    if (isLineBreaking)
                    {
                        if (ReadHelper.IsEndOfLine(c))
                        {
                            continue;
                        }

                        isLineBreaking = false;
                        builder.Append(c);
                    }
                    else if (isEscapeActive)
                    {
                        ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking);
                        isEscapeActive = false;
                    }
                    else
                    {
                        builder.Append(c);
                    }

                    break;
                }

                if (numberOfBrackets <= 0)
                {
                    break;
                }
            }

            token = new StringToken(builder.ToString());

            return(true);
        }