Beispiel #1
0
        private static void CheckNumber(RawToken token)
        {
            if (Value.IsNumber(token.Content))
            {
                return;
            }

            token.Error        = true;
            token.ErrorMessage = Messages.UnrecognizedNumber;
            token.ErrorParams  = new object[] { token.Content };
        }
Beispiel #2
0
        public Token(PageLine line, int index, RawToken token)
        {
            Page  = line.Page;
            Line  = line;
            Index = index;

            ColumnStart = token.ColumnStart;
            ColumnEnd   = token.ColumnEnd;

            Type    = token.Type;
            Hints   = TokenHints.None;
            Content = token.Content;
        }
Beispiel #3
0
        public static void TryParse(string line, out IList <RawToken> tokens)
        {
            // A big function with a goto statement - I had to try it at least once, for old time's sake

            int             index     = 0;
            List <RawToken> tokenList = new List <RawToken>();

            bool hasCurrent  = index < line.Length;
            char currentChar = line[index];

            void Step()
            {
                index++;
                hasCurrent = index < line.Length;
                if (hasCurrent)
                {
                    currentChar = line[index];
                }
            }

            RawToken currentToken;

            // Walk the string one char at a time

            while (hasCurrent)
            {
                if (char.IsWhiteSpace(currentChar))
                {
                    Step();
                    continue;
                }

                // While there are still chars left, follow a three step process

                currentToken = new RawToken
                {
                    ColumnStart = index,
                    Content     = ""
                };

                char stringBoundary = '"';

                // Step 1: identify the current char as being the start of a certain type of token
                // (Or break early if the char is unexpected)

                // Some token types look indentical to begin with - these share the same token "prototype"
                // For example, a keyword, type and variable name all share the prototype "Word"

                TokenPrototype?prototype = null;

                if (Symbols.IsKeyCharacter(currentChar))
                {
                    prototype = TokenPrototype.KeySymbol;
                }
                else if (Symbols.IsWordStart(currentChar))
                {
                    prototype = TokenPrototype.Word;
                }
                else if (Symbols.IsNumberStart(currentChar))
                {
                    prototype = TokenPrototype.Number;
                }
                else if (Symbols.IsStringStart(currentChar))
                {
                    prototype = TokenPrototype.String;
                }
                else if (Symbols.IsOperatorStart(currentChar))
                {
                    prototype = TokenPrototype.Operator;
                }
                else if (Symbols.IsCommentStart(currentChar))
                {
                    prototype = TokenPrototype.Comment;
                }

                if (prototype.HasValue)
                {
                    // Special case for strings: the first char will be a "string start"
                    // char (" or ') not part of the string
                    if (prototype == TokenPrototype.String)
                    {
                        // We do need which of " or ' it is for later
                        stringBoundary = currentChar;
                    }
                    else
                    {
                        currentToken.Content = currentChar.ToString();
                    }
                }
                else
                {
                    // It might be nice to make an unrecognized character token instead
                    // That would allow us to continue instead of break out of the line

                    currentToken.Error        = true;
                    currentToken.ErrorMessage = Messages.UnrecognizedSymbols;
                    currentToken.Type         = TokenType.Unknown;
                    currentToken.Content      = "";

                    while (hasCurrent && Symbols.IsUnkownStart(currentChar))
                    {
                        currentToken.Content += currentChar;
                        Step();
                    }

                    currentToken.ColumnEnd = index;

                    tokenList.Add(currentToken);

                    continue;
                }

                // Step 2: keep stepping forward while the chars match the interior of
                // the current token prototype
                // (Append the current char to the token content at each step)

                Step();

                while (hasCurrent)
                {
                    switch (prototype)
                    {
                    case TokenPrototype.Word when !Symbols.IsWordCharacter(currentChar):
                    case TokenPrototype.Number when !Symbols.IsNumberCharacter(currentChar):
                    case TokenPrototype.Operator when !Symbols.IsOperatorCharacter(currentChar):
                    // Special case for key symbols: they have no interior
                    case TokenPrototype.KeySymbol:
                    // Special case for strings: two ends possible, it must match the start
                    case TokenPrototype.String when stringBoundary == currentChar:
                        goto CompleteToken;

                    default: break;
                    }

                    currentToken.Content += currentChar;
                    Step();
                }

                CompleteToken :;

                // Special case for strings: the last char will be the "string end"
                // char (" or '), not part of the string
                if (prototype == TokenPrototype.String)
                {
                    Step();
                }

                // Step 3: Identity the full type of the token from the completed content,
                // and add the completed token to the list.
                // (Break early if the token cannot be identified)

                currentToken.ColumnEnd = index;
                TokenType completedType;

                string content = currentToken.Content;

                switch (prototype)
                {
                // Some words have immediately recognizable meaning

                case TokenPrototype.Word when Operator.IsOperator(text: content) : completedType = TokenType.Operator; break;

                case TokenPrototype.Word when Keywords.IsKeyword(content): completedType = TokenType.KeyWord; break;

                case TokenPrototype.Word when SprakType.IsType(content): completedType = TokenType.Type; break;

                case TokenPrototype.Word when Value.IsBoolean(content): completedType = TokenType.Boolean; break;

                // The rest can only be checked for validity at a higher level

                case TokenPrototype.Word: completedType = TokenType.Name; break;

                // Numbers can be recognized as valid/invalid straight away.

                case TokenPrototype.Number:
                    completedType = TokenType.Number;
                    CheckNumber(currentToken);
                    break;

                // These will always be valid tokens

                // Technically, operator tokens might not represent a real
                // operator, but fully checking that requires more context and
                // is left until later.

                case TokenPrototype.Operator: completedType = TokenType.Operator; break;

                case TokenPrototype.KeySymbol: completedType = TokenType.KeySymbol; break;

                case TokenPrototype.String: completedType = TokenType.String; break;

                case TokenPrototype.Comment: completedType = TokenType.Comment; break;

                // Ideally, this can't happen, unless I've made a mistake somewhere above

                default:
                    string prototypeName = Enum.GetName(typeof(TokenPrototype), prototype);
                    string message       = $"Token type not recognised: [{prototypeName}:{content}]";
                    throw new NotImplementedException(message);
                }


                currentToken.Type = completedType;
                tokenList.Add(currentToken);
            } // end main while

            tokens = tokenList;
        }
Beispiel #4
0
        private static bool TryParse(IList <RawToken> tokens, out Value result)
        {
            result = null;

            if (tokens.Count == 0)
            {
                return(false);
            }

            else if (tokens.Count == 1)
            {
                RawToken token   = tokens[0];
                string   content = token.Content;

                switch (token.Type)
                {
                case TokenType.Boolean
                    when SprakBoolean.TryParse(content, out SprakBoolean raw):
                    result = raw;

                    return(true);

                case TokenType.Number
                    when SprakNumber.TryParse(content, out SprakNumber raw):
                    result = raw;

                    return(true);

                case TokenType.String:
                    result = new SprakString(content);
                    return(true);
                }
            }
            else
            {
                if (tokens[0].Type != TokenType.KeySymbol || tokens[0].Content[0] != Symbols.OpenSquareBracket)
                {
                    return(false);
                }

                if (tokens[0].Type != TokenType.KeySymbol || tokens[0].Content[0] != Symbols.CloseSquareBracket)
                {
                    return(false);
                }

                List <Value>    items   = new List <Value>();
                List <RawToken> current = new List <RawToken>();

                for (int i = 1; i < tokens.Count - 1; i++)
                {
                    if (tokens[i].Type == TokenType.KeySymbol && tokens[i].Content[0] == Symbols.Comma)
                    {
                        if (!TryParse(tokens, out Value item))
                        {
                            return(false);
                        }

                        items.Add(item);
                        current.Clear();
                    }
                    else
                    {
                        current.Add(tokens[i]);
                    }
                }
            }

            return(false);
        }