Пример #1
0
        //This is the main method for the Tokenizing assignment.
        //Takes a list of code lines, and returns a list of tokens.
        //For each token you must identify its type, and instantiate the correct subclass accordingly.
        //You need to identify the token position in the file (line, index within the line).
        //You also need to identify errors, in this assignement - illegal identifier names.
        public List <Token> Tokenize(List <string> lCodeLines)
        {
            List <Token> lTokens = new List <Token>();

            //your code here
            char[] delimiter = { ' ', ',', ';', '*', '+', '-', '/', '<', '>', '&', '=', '|', '!', '(', ')', '[', ']', '{', '}', '\t' };
            string sToken;
            int    cChars;

            for (int i = 0; i < lCodeLines.Count; i++)
            {
                string code_line = lCodeLines[i];
                int    position  = 0;
                while (code_line != null && code_line.Length > 0)
                {
                    if (code_line.Length > 1 && code_line[0] == '/' && code_line[1] == '/')
                    {
                        break;
                    }
                    code_line = Next(code_line, delimiter, out sToken, out cChars);
                    if (sToken != " " && sToken != "\t")
                    {
                        if (sToken.Length == 1)
                        {
                            if (Token.Parentheses.Contains(sToken[0]))
                            {
                                Parentheses token = new Parentheses(sToken[0], i, position);
                                lTokens.Add(token);
                            }
                            else if (Token.Operators.Contains(sToken[0]))
                            {
                                Operator token = new Operator(sToken[0], i, position);
                                lTokens.Add(token);
                            }
                            else if (Token.Separators.Contains(sToken[0]))
                            {
                                Separator token = new Separator(sToken[0], i, position);
                                lTokens.Add(token);
                            }
                            else if (Token.Numbers.Contains(sToken[0]))
                            {
                                Number token = new Number(sToken, i, position);
                                lTokens.Add(token);
                            }
                            else
                            {
                                if (sToken[0] >= 'a' && sToken[0] <= 'z' || sToken[0] >= 'A' && sToken[0] <= 'Z')
                                {
                                    Identifier token = new Identifier(sToken, i, position);
                                    lTokens.Add(token);
                                }
                                else
                                {
                                    Token token = new Token();
                                    token.Line     = i;
                                    token.Position = position;
                                    throw new SyntaxErrorException("syntaxError", token);
                                }
                            }
                        }
                        else if (sToken.Length > 1)
                        {
                            if (Token.Statements.Contains(sToken))
                            {
                                Statement token = new Statement(sToken, i, position);
                                lTokens.Add(token);
                            }
                            else if (Token.VarTypes.Contains(sToken))
                            {
                                VarType token = new VarType(sToken, i, position);
                                lTokens.Add(token);
                            }
                            else if (Token.Constants.Contains(sToken))
                            {
                                Constant token = new Constant(sToken, i, position);
                                lTokens.Add(token);
                            }
                            else if (isNumber(sToken))
                            {
                                Number token = new Number(sToken, i, position);
                                lTokens.Add(token);
                            }
                            else
                            {
                                if (sToken[0] >= 'a' && sToken[0] <= 'z' || sToken[0] >= 'A' && sToken[0] <= 'Z')
                                {
                                    for (int j = 1; j < sToken.Length; j++)
                                    {
                                        if (!(sToken[j] >= 'a' && sToken[j] <= 'z' || sToken[j] >= 'A' && sToken[j] <= 'Z' || Token.Numbers.Contains(sToken[j])))
                                        {
                                            Token corrupt_token = new Token();
                                            corrupt_token.Line     = i;
                                            corrupt_token.Position = position;
                                            throw new SyntaxErrorException("syntaxError", corrupt_token);
                                        }
                                    }
                                    Identifier token = new Identifier(sToken, i, position);
                                    lTokens.Add(token);
                                }
                                else
                                {
                                    Token token = new Token();
                                    token.Line     = i;
                                    token.Position = position;
                                    throw new SyntaxErrorException("syntaxError", token);
                                }
                            }
                        }
                    }
                    position = position + sToken.Length;
                }
            }
            return(lTokens);
        }
Пример #2
0
        public List <Token> Tokenize(string sLine, int iLine)
        {
            int Linenumber     = iLine;
            int Positionnumber = 0;
            int isnumber;

            char[]       Delimiters = { '*', '+', '-', '/', '<', '>', '&', '=', '|', '~', ',', ';', ' ', '(', ')', '[', ']', '{', '}' };
            List <Token> lTokens    = new List <Token>();

            //tab check reduce substring
            while (true)
            {
                if (sLine.Contains("\t"))
                {
                    Positionnumber++;
                    sLine = sLine.Substring(1);
                }
                else
                {
                    break;
                }
            }

            List <string> TokensToClassified = new List <string>();

            TokensToClassified = Split(sLine, Delimiters);
            for (int i = 0; i < TokensToClassified.Count; i++)
            {
                //if white space ignore else classified
                if (TokensToClassified[i].Equals(" "))
                {
                    continue;
                }
                //statment token
                if (Token.Statements.Contains(TokensToClassified[i]))
                {
                    Statement statment = new Statement(TokensToClassified[i], Linenumber, Positionnumber + i);
                    Positionnumber = Positionnumber + TokensToClassified[i].Length - 1;
                    lTokens.Add(statment);
                    continue;
                }
                //vartype token
                if (Token.VarTypes.Contains(TokensToClassified[i]))
                {
                    VarType vartype = new VarType(TokensToClassified[i], Linenumber, Positionnumber + i);
                    Positionnumber = Positionnumber + TokensToClassified[i].Length - 1;
                    lTokens.Add(vartype);
                    continue;
                }
                //constants token
                if (Token.Constants.Contains(TokensToClassified[i]))
                {
                    Constant constnt = new Constant(TokensToClassified[Positionnumber], Linenumber, Positionnumber + i);
                    Positionnumber = Positionnumber + TokensToClassified[i].Length - 1;
                    lTokens.Add(constnt);
                    continue;
                }
                if (TokensToClassified[i].Length == 1)
                {
                    char[] charArray = TokensToClassified[i].ToCharArray();
                    char   c         = charArray[0];
                    //opertors token
                    if (Token.Operators.Contains(c))
                    {
                        Operator opertor = new Operator(c, Linenumber, Positionnumber + i);

                        lTokens.Add(opertor);
                        continue;
                    }
                    //parentheses token
                    if (Token.Parentheses.Contains(c))
                    {
                        Parentheses parentheses = new Parentheses(c, Linenumber, Positionnumber + i);

                        lTokens.Add(parentheses);
                        continue;
                    }
                    //sepertor token
                    if (Token.Separators.Contains(c))
                    {
                        Separator separator = new Separator(c, Linenumber, Positionnumber + i);

                        lTokens.Add(separator);
                        continue;
                    }
                }
                //number token
                if (int.TryParse(TokensToClassified[i], out isnumber))
                {
                    Number number = new Number(TokensToClassified[i], Linenumber, Positionnumber + i);
                    Positionnumber = Positionnumber + TokensToClassified[i].Length - 1;
                    lTokens.Add(number);
                    continue;
                }
                //identifier token
                if ((Regex.IsMatch(TokensToClassified[i], @"^[a-zA-Z0-9_]+$") && (TokensToClassified[i][0] != (0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9))))
                {
                    Identifier identifier = new Identifier(TokensToClassified[i], Linenumber, Positionnumber + i);
                    Positionnumber = Positionnumber + TokensToClassified[i].Length - 1;
                    lTokens.Add(identifier);
                    continue;
                }
                else
                {
                    Positionnumber = Positionnumber + TokensToClassified[i].Length - 1;
                    Token problematic = new Token();
                    problematic.Line     = Linenumber;
                    problematic.Position = Positionnumber;
                    SyntaxErrorException error = new SyntaxErrorException("syntax token is invalid", problematic);
                    throw error;
                }
            }

            return(lTokens);
        }
Пример #3
0
        // splits each line by the delimiters and then checks for each symbol in the splitted line which token is relevent
        public List <Token> Tokenize(List <string> lCodeLines)
        {
            List <string> string_tokens;
            List <Token>  lTokens = new List <Token>();

            char[] delemiters = { '(', ')', '{', '}', ',', ';', '*', '+', '<', '>', '=', '-', '\t', ' ', '/', '[', ']', '!', '\n' };
            int    line_index = 0, postion, n;
            string stripedLine;
            Token  token;
            char   symbolChar;

            // iterate through the line
            foreach (string sLine in lCodeLines)
            {
                stripedLine = Regex.Split(sLine, "//")[0];
                if (stripedLine.Length == 0 || stripedLine.StartsWith("//"))
                {
                    line_index++;
                    continue;
                }
                string_tokens = Split(stripedLine, delemiters);
                postion       = 0;
                // iterate through the tokens
                foreach (string symbol in string_tokens)
                {
                    if (symbol.StartsWith("\t") || symbol.StartsWith(" ") || symbol.StartsWith("\n"))
                    {
                        postion++;
                        continue;
                    }
                    if (Token.Statements.Contains(symbol))
                    {
                        token = new Statement(symbol, line_index, postion);
                    }
                    else if (Token.VarTypes.Contains(symbol))
                    {
                        token = new VarType(symbol, line_index, postion);
                    }
                    else if (Token.Constants.Contains(symbol))
                    {
                        token = new Constant(symbol, line_index, postion);
                    }
                    else if (symbol.Length == 1)
                    {
                        symbolChar = symbol[0];
                        if (Token.Operators.Contains(symbolChar))
                        {
                            token = new Operator(symbolChar, line_index, postion);
                        }
                        else if (Token.Parentheses.Contains(symbolChar))
                        {
                            token = new Parentheses(symbolChar, line_index, postion);
                        }
                        else if (Token.Separators.Contains(symbolChar))
                        {
                            token = new Separator(symbolChar, line_index, postion);
                        }
                        else if (int.TryParse(symbol, out n))
                        {
                            token = new Number(symbol, line_index, postion);
                        }
                        else
                        {
                            token = new Identifier(symbol, line_index, postion);
                        }
                    }
                    else if (int.TryParse(symbol, out n))
                    {
                        token = new Number(symbol, line_index, postion);
                    }
                    else
                    {
                        token = new Identifier(symbol, line_index, postion);
                    }
                    lTokens.Add(token);
                    n       = symbol.Length;
                    postion = postion + symbol.Length;
                }
                line_index++;
            }
            return(lTokens);
        }
Пример #4
0
        //This is the main method for the Tokenizing assignment.
        //Takes a list of code lines, and returns a list of tokens.
        //For each token you must identify its type, and instantiate the correct subclass accordingly.
        //You need to identify the token position in the file (line, index within the line).
        //You also need to identify errors, in this assignement - illegal identifier names.
        public List <Token> Tokenize(List <string> lCodeLines)
        {
            List <Token> lTokens   = new List <Token>();
            int          numOfLine = 0;

            foreach (string line in lCodeLines)
            {
                int           placeOfToken = 0;
                List <string> tokens       = Split(line, Delimiters);
                if (line.Contains("//"))
                {
                }
                else
                {
                    int help;
                    foreach (string token in tokens)
                    {
                        if (int.TryParse(token, out help))
                        {
                            Number tmp = new Number(token, numOfLine, placeOfToken);
                            placeOfToken = placeOfToken + token.Length;
                            lTokens.Add(tmp);
                        }
                        else if (token.Length == 1 & token != " " & token != "\t")
                        {
                            if (Token.Operators.Contains(token[0]))
                            {
                                Operator tmp = new Operator(token[0], numOfLine, placeOfToken);
                                placeOfToken++;
                                lTokens.Add(tmp);
                            }
                            else if (Token.Parentheses.Contains(token[0]))
                            {
                                Parentheses tmp = new Parentheses(token[0], numOfLine, placeOfToken);
                                placeOfToken = placeOfToken + token.Length;
                                lTokens.Add(tmp);
                            }
                            else if (Token.Separators.Contains(token[0]))
                            {
                                Separator tmp = new Separator(token[0], numOfLine, placeOfToken);
                                placeOfToken = placeOfToken + token.Length;
                                lTokens.Add(tmp);
                            }
                            else if (!char.IsDigit(token[0]) & token[0] != '#')
                            {
                                Identifier tmp = new Identifier(token, numOfLine, placeOfToken);
                                placeOfToken = placeOfToken + token.Length;
                                lTokens.Add(tmp);
                            }
                            else
                            {
                                Token tmp = new Token();
                                tmp.Line     = numOfLine;
                                tmp.Position = placeOfToken;
                                placeOfToken = placeOfToken + token.Length;
                                lTokens.Add(tmp);
                                throw new SyntaxErrorException("problem", tmp);
                            }
                        }
                        else if (token.Equals(" "))
                        {
                            placeOfToken++;
                        }
                        else if (token.Equals("\t"))
                        {
                            placeOfToken++;
                        }
                        else
                        {
                            if (Token.Statements.Contains(token))
                            {
                                Statement tmp = new Statement(token, numOfLine, placeOfToken);
                                placeOfToken = placeOfToken + token.Length;
                                lTokens.Add(tmp);
                            }
                            else if (Token.VarTypes.Contains(token))
                            {
                                VarType tmp = new VarType(token, numOfLine, placeOfToken);
                                placeOfToken = placeOfToken + token.Length;
                                lTokens.Add(tmp);
                            }
                            else if (Token.Constants.Contains(token))
                            {
                                Constant tmp = new Constant(token, numOfLine, placeOfToken);
                                placeOfToken = placeOfToken + token.Length;
                                lTokens.Add(tmp);
                            }
                            else
                            {
                                /* bool isValid = false;
                                 * for(int i = 1; i<token.Length & !isValid; i++)
                                 * {
                                 *   if (char.IsLetter(token[i]))
                                 *       isValid = true;
                                 * }*/
                                if (char.IsLetter(token[0]))
                                {
                                    Identifier tmp = new Identifier(token, numOfLine, placeOfToken);
                                    placeOfToken = placeOfToken + token.Length;
                                    lTokens.Add(tmp);
                                }
                                else
                                {
                                    Token tmp = new Token();
                                    tmp.Line     = numOfLine;
                                    tmp.Position = placeOfToken;
                                    placeOfToken = placeOfToken + token.Length;
                                    lTokens.Add(tmp);
                                    throw new SyntaxErrorException("problem", tmp);
                                }
                            }
                        }
                    }
                }
                numOfLine++;
            }
            //throw new Exception(string.Join("\n", lCodeLines.ToArray()));
            return(lTokens);
        }
Пример #5
0
        public List <Token> Tokenize(List <string> lCodeLines)
        {
            List <Token> lTokens = new List <Token>();
            int          lineNum = 0;

            foreach (string Line in lCodeLines)
            {
                if (Line.Contains("//"))
                {
                    lineNum++;
                    continue;
                }
                int           pos        = 0;
                List <string> afterSplit = new List <string>();
                afterSplit = Split(Line, Delimiters);
                foreach (string lineToken in afterSplit)
                {
                    Token thisToken;
                    if (Contains(Statements, lineToken))
                    {
                        thisToken = new Statement(lineToken, lineNum, pos);
                        lTokens.Add(thisToken);
                        pos += lineToken.Length;
                    }
                    else if (Contains(VarTypes, lineToken))
                    {
                        thisToken = new VarType(lineToken, lineNum, pos);
                        lTokens.Add(thisToken);
                        pos += lineToken.Length;
                    }
                    else if (Contains(Constants, lineToken))
                    {
                        thisToken = new Constant(lineToken, lineNum, pos);
                        lTokens.Add(thisToken);
                        pos += lineToken.Length;
                    }
                    else if (char.IsLetter(lineToken[0]))
                    {
                        thisToken = new Identifier(lineToken, lineNum, pos);
                        lTokens.Add(thisToken);
                        pos += lineToken.Length;
                    }
                    else if (char.IsDigit(lineToken[0]))
                    {
                        thisToken = new Number(lineToken, lineNum, pos);
                        lTokens.Add(thisToken);
                        pos += lineToken.Length;
                    }
                    else if (lineToken.Length < 2 && !char.IsDigit(lineToken[0]))
                    {
                        char clineToken = lineToken[0];
                        if (clineToken.Equals(' ') || clineToken.Equals('\t'))
                        {
                            pos++;
                        }
                        else if (Contains(Operators, clineToken))
                        {
                            thisToken = new Operator(clineToken, lineNum, pos);
                            lTokens.Add(thisToken);
                            pos++;
                        }
                        else if (Contains(Parentheses, clineToken))
                        {
                            thisToken = new Parentheses(clineToken, lineNum, pos);
                            lTokens.Add(thisToken);
                            pos++;
                        }
                        else if (Contains(Separators, clineToken))
                        {
                            thisToken = new Separator(clineToken, lineNum, pos);
                            lTokens.Add(thisToken);
                            pos++;
                        }
                        else if (!Contains(Delimiters, clineToken))
                        {
                            pos++;
                            thisToken = new Separator(clineToken, lineNum, pos);
                            throw new SyntaxErrorException("Wrong Syntax", thisToken);
                        }
                    }
                }
                lineNum++;
            }

            return(lTokens);
        }