예제 #1
0
        public Token[] Tokenize(string str, out bool isValid)
        {
            List <Token> tokens       = new List <Token>();
            int          c            = 0;
            TokenMode    currentToken = blockStartToken;
            var          pathways     = currentToken.Pathways;

            while (true)
            {
                while (c < str.Length && char.IsWhiteSpace(str[c]))
                {
                    ++c;
                }
                if (c >= str.Length)
                {
                    break;
                }
                String rest  = str.Substring(c, str.Length - c);
                int    origC = c;
                foreach (var pathway in universalPathways.Concat(pathways))
                {
                    foreach (var pattern in pathway.Patterns)
                    {
                        var match = Regex.Match(rest, pattern);
                        if (match.Success && match.Value.Length > 0)
                        {
                            tokens.Add(new Token()
                            {
                                PointInString = c, String = match.Value, Mode = pathway
                            });
                            c           += match.Value.Length;
                            currentToken = pathway;
                            if (currentToken.Pathways != null)
                            {
                                pathways = currentToken.Pathways;
                            }
                            //Console.WriteLine(currentToken.Name);
                            goto done;
                        }
                    }
                }
done:
                if (origC == c)
                {
                    int end = Math.Min(str.Length, str.IndexOf('\r', c));
                    int cnt = Math.Min(end - c, 40);
                    Console.WriteLine("Unable to tokenize at " + c + ":\r\n" +
                                      "  " + str.Substring(c, cnt) + "\r\n" +
                                      "from <" + currentToken.Name + "> pattern " + currentToken);
                    break;
                }
            }
            isValid = (c == str.Length);
            return(tokens.ToArray());
        }
예제 #2
0
 public static TokenMode GetBlockEndMode(TokenMode blockStart)
 {
     if (blockStart == parmStartToken)
     {
         return(parmEndToken);
     }
     if (blockStart == blockStartToken)
     {
         return(blockEndToken);
     }
     return(null);
 }
예제 #3
0
 public GrammarElementT(TokenMode mode, Func <Token, bool> validCheck)
 {
     Mode = mode; ValidityCheck = validCheck;
 }
예제 #4
0
 public GrammarElementT(TokenMode mode)
 {
     Mode = mode;
 }
예제 #5
0
파일: Tokenizer.cs 프로젝트: djlw78/crayon
        public static Token[] Tokenize(FileScope file)
        {
            Localization.Locale locale = file.CompilationScope.Locale;
            string code = file.Content;

            // Add a newline and a dummy character at the end.
            // Set the length equal to the code with the newline but without the null terminator.
            // This makes dereferencing the index + 1 code simpler and all makes the check for the end
            // of word tokens and single-line comments easy.
            code += "\n\0";
            int length = code.Length - 1;

            int[] lineByIndex = new int[code.Length];
            int[] colByIndex  = new int[code.Length];
            char  c;
            int   line = 0;
            int   col  = 0;

            for (int i = 0; i < code.Length; ++i)
            {
                c = code[i];
                lineByIndex[i] = line;
                colByIndex[i]  = col;
                if (c == '\n')
                {
                    ++line;
                    col = -1;
                }

                ++col;
            }

            List <Token> tokens = new List <Token>();

            TokenMode mode        = TokenMode.NORMAL;
            char      modeSubtype = ' ';
            int       tokenStart  = 0;
            string    tokenValue;
            char      c2;
            bool      isTokenEnd  = false;
            bool      stringIsRaw = false;

            for (int i = 0; i < length; ++i)
            {
                c = code[i];

                switch (mode)
                {
                case TokenMode.COMMENT:
                    if (modeSubtype == '*')
                    {
                        if (c == '*' && code[i + 1] == '/')
                        {
                            ++i;
                            mode = TokenMode.NORMAL;
                        }
                    }
                    else
                    {
                        if (c == '\n')
                        {
                            mode = TokenMode.NORMAL;
                        }
                    }
                    break;

                case TokenMode.NORMAL:
                    if (WHITESPACE.Contains(c))
                    {
                        // do nothing
                    }
                    else if (c == '/' && (code[i + 1] == '/' || code[i + 1] == '*'))
                    {
                        mode        = TokenMode.COMMENT;
                        modeSubtype = code[++i];
                    }
                    else if (IsIdentifierChar(c))
                    {
                        tokenStart = i;
                        mode       = TokenMode.WORD;
                    }
                    else if (c == '"' | c == '\'')
                    {
                        tokenStart  = i;
                        mode        = TokenMode.STRING;
                        modeSubtype = c;
                        stringIsRaw = tokens.Count > 0 && tokens[tokens.Count - 1].Value == "@";
                    }
                    else
                    {
                        if (c == '.')
                        {
                            c2 = code[i + 1];
                            if (c2 >= '0' && c2 <= '9')
                            {
                                mode       = TokenMode.WORD;
                                tokenStart = i++;
                            }
                        }

                        if (mode == TokenMode.NORMAL)
                        {
                            tokens.Add(new Token(c.ToString(), TokenType.PUNCTUATION, file, lineByIndex[i], colByIndex[i]));
                        }
                    }
                    break;

                case TokenMode.STRING:
                    if (c == modeSubtype)
                    {
                        tokenValue = code.Substring(tokenStart, i - tokenStart + 1);
                        tokens.Add(new Token(tokenValue, TokenType.STRING, file, lineByIndex[i], colByIndex[i]));
                        mode = TokenMode.NORMAL;
                    }
                    else if (!stringIsRaw && c == '\\')
                    {
                        ++i;
                    }
                    break;

                case TokenMode.WORD:
                    isTokenEnd = false;
                    if (IsIdentifierChar(c))
                    {
                        // do nothing
                    }
                    else if (c == '.')
                    {
                        if (code[tokenStart] >= '0' && code[tokenStart] <= '9')
                        {
                            // do nothing
                        }
                        else
                        {
                            isTokenEnd = true;
                        }
                    }
                    else
                    {
                        isTokenEnd = true;
                    }

                    if (isTokenEnd)
                    {
                        tokenValue = code.Substring(tokenStart, i - tokenStart);
                        c          = tokenValue[0];
                        TokenType type = TokenType.WORD;
                        if ((c >= '0' && c <= '9') || c == '.')
                        {
                            type = TokenType.NUMBER;
                        }
                        else if (!locale.Keywords.IsValidVariable(tokenValue))
                        {
                            type = TokenType.KEYWORD;
                        }
                        tokens.Add(new Token(tokenValue, type, file, lineByIndex[tokenStart], colByIndex[tokenStart]));
                        mode = TokenMode.NORMAL;
                        --i;
                    }
                    break;
                }
            }

            switch (mode)
            {
            case TokenMode.COMMENT:
                throw new ParserException(file, "There is an unclosed comment in this file.");

            case TokenMode.STRING:
                throw new ParserException(file, "There is an unclosed string in this file.");

            case TokenMode.WORD:
                throw new System.InvalidOperationException();

            default:
                break;
            }

            return(tokens.ToArray());
        }
예제 #6
0
        private MethodDef(IMethodDef original, string name            = null, string returnType = null, XmlElement summary = null, XmlElement returnDescription = null, IException exception = null,
                          ReadOnlyCollection <GenericArg> genericArgs = null, ReadOnlyCollection <Parameter> parameters = null, XmlElement bodyElement = null, XmlElement syncBodyElement = null,
                          XmlElement asyncBodyElement = null, XmlElement syncAsyncBodyElement = null, XmlElement streamBodyElement = null,
                          XmlElement tokenBodyElement = null, string overload = null, bool?needsStream = null, bool?streamSerial = null, bool?needsAsync = null, TokenMode tokenMode = TokenMode.None)
        {
            methodDefXml = original.GetMethodDefXml();

            Name                 = name ?? original.Name;
            ReturnType           = returnType ?? original.ReturnType;
            Summary              = summary ?? original.Summary;
            ReturnDescription    = returnDescription ?? original.ReturnDescription;
            Exception            = exception ?? (original.Exception != null ? new Exception(original.Exception) : null);
            GenericArgs          = genericArgs != null && genericArgs.Count > 0 ? genericArgs : original.GenericArgs?.Select(a => new GenericArg(a)).ToReadOnlyList();
            Parameters           = parameters != null && parameters.Count > 0 ? parameters : original.Parameters?.Select(p => new Parameter(p)).ToReadOnlyList();
            BodyElement          = bodyElement ?? (XmlElement)original.BodyElement?.CloneNode(true);
            SyncBodyElement      = syncBodyElement ?? (XmlElement)original.SyncBodyElement?.CloneNode(true);
            AsyncBodyElement     = asyncBodyElement ?? (XmlElement)original.AsyncBodyElement?.CloneNode(true);
            SyncAsyncBodyElement = syncAsyncBodyElement ?? (XmlElement)original.SyncAsyncBodyElement?.CloneNode(true);
            StreamBodyElement    = streamBodyElement ?? (XmlElement)original.StreamBodyElement?.CloneNode(true);
            TokenBodyElement     = tokenBodyElement ?? (XmlElement)original.TokenBodyElement?.CloneNode(true);
            Overload             = overload ?? original.Overload;
            NeedsStream          = needsStream.HasValue ? needsStream.Value : original.NeedsStream;
            NeedsAsync           = needsAsync.HasValue ? needsAsync.Value : original.NeedsAsync;
            TokenMode            = tokenMode != TokenMode.None ? tokenMode : original.TokenMode;
        }