public List <LineTokenCollection> Scan(SourceReader sr, ContextFile fileContext, int startLine) { Reset(sr, fileContext); line = startLine; curToks = new LineTokenCollection(); //report("开始"); //InitLineFirst(); while (ch != END) { //report("ch="+ch+" "+(int)ch); //if (line == 33 )// ch == '控' && line == 18) //{ // //report("col:" + col); // Widther.IsDebug = true; //} //else if ( line == 34)// ch == '控' && line == 18) //{ // Widther.IsDebug = true; //} //else //{ // Widther.IsDebug = false; //} char nextChar = GetNext(); if (ch == ' ' || ch == '\t') { //report("SkipSpace"); SkipWhiteSpace(); } else if (ch == '/' && nextChar == '/') { SkipSingleLineComment(); } else if (ch == '/' && nextChar == '*') { SkipMutilLineComment(); } else if (ch == '/') { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.DIV);// { Col = col, Line = line, }; curToks.Add(tok); Next(); } else if (ch == '"' || ch == '“' || ch == '”') { string str = scanString(); LexTokenLiteral tok = new LexTokenLiteral(line, col - 1, TokenKindLiteral.LiteralString, str);// { Col = col - 1, Line = line, Text = str, Kind = TokenKindSymbol.LiteralString }; curToks.Add(tok); } else if (ch == '\r' && nextChar == '\n') { Next(); Next(); AddLineToken();//lineTokens.Add(curToks); curToks = new LineTokenCollection(); ScanNewLine(); } else if (ch == '\n' || ch == '\r') { Next(); AddLineToken(); //lineTokens.Add(curToks); curToks = new LineTokenCollection(); ScanNewLine(); } else if ("0123456789".IndexOf(ch) != -1) { string str = scanNumber(); var temp = col; if (StringHelper.IsInt(str)) { LexTokenLiteral tok = new LexTokenLiteral(line, temp, TokenKindLiteral.LiteralInt, str); curToks.Add(tok); } else if (StringHelper.IsFloat(str)) { LexTokenLiteral tok = new LexTokenLiteral(line, temp, TokenKindLiteral.LiteralFloat, str); curToks.Add(tok); } else { lexError(str + "不是正确的数字"); } } else if (ch == '+') { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.ADD); curToks.Add(tok); Next(); } else if (ch == '-') { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.SUB); curToks.Add(tok); Next(); } else if ((ch == '=') && (nextChar == '=')) { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.EQ); curToks.Add(tok); Next(); Next(); } else if ((ch == '=') && (nextChar == '>')) { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.AssignTo); curToks.Add(tok); Next(); Next(); } else if ((ch == '=')) { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.Assign); curToks.Add(tok); Next(); } else if ((ch == '*')) { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.MUL); curToks.Add(tok); Next(); } else if (ch == ',' || ch == ',') { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.Comma); curToks.Add(tok); Next(); } else if (ch == ';' || ch == ';') { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.Semi); curToks.Add(tok); Next(); } else if (ch == '(' || ch == '(') { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.LBS); curToks.Add(tok); Next(); } else if (ch == ')' || ch == ')') { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.RBS); curToks.Add(tok); Next(); } else if (ch == '>' && GetNext() == '=') { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.GE); curToks.Add(tok); Next(); Next(); } else if (ch == '>') { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.GT); curToks.Add(tok); Next(); } else if (ch == '<' && nextChar == '=') { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.LE); curToks.Add(tok); Next(); Next(); } else if (ch == '<') { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.LT); curToks.Add(tok); Next(); } else if ((nextChar == '!' || nextChar == '!') && (nextChar == '=' || nextChar == '=')) { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.NE); curToks.Add(tok); Next(); Next(); } else if (ch == ':' || ch == ':') { LexTokenSymbol tok = new LexTokenSymbol(line, col, TokenKindSymbol.Colon);// { Col = col, Line = line, Kind = TokenKindSymbol.Colon }; curToks.Add(tok); Next(); } else if ((ch >= 'A' && ch <= 'Z') /*|| (ch == '_') */ || (ch >= 'a' && ch <= 'z') || ChineseHelper.IsChineseLetter(ch)) { var tempCol = col; var tempLine = line; LexToken t1 = scanIdentToken(); if (t1.Text == "说明") { if (ch == ':' || ch == ':') { SkipSingleLineComment(); continue;; } } addIdentOrKey(t1); } else if (char.IsControl(ch)) { while (char.IsControl(ch) && ch != END) { Next(); if ((int)ch == 13) { ScanNewLine(); } } } else { lexError("无法识别" + (int)ch + ": '" + ch + "' "); Next(); } } if (curToks != null && curToks.Count > 0) { AddLineToken(); } return(lineTokens); }
public Tokenizer(SourceReader sr) { reader = sr; line = 1; col = 1; }
public List <Token> Scan(SourceReader sr, ContextFile fileContext) { Reset(sr, fileContext); //report("开始"); tokenList.Clear(); while (ch != END) { //report("ch="+ch+" "+(int)ch); char nextChar = GetNext(); if (ch == ' ' || ch == '\t') { //report("SkipSpace"); SkipWhiteSpace(); } else if (ch == '/' && nextChar == '/') { SkipSingleLineComment(); } else if (ch == '/' && nextChar == '*') { SkipMutilLineComment(); } else if (ch == '/') { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.DIV }; tokenList.Add(tok); Next(); } else if (ch == '"' || ch == '“' || ch == '”') { string str = scanString(); Token tok = new Token() { Col = col - 1, Line = line, Text = str, Kind = TokenKind.LiteralString }; tokenList.Add(tok); } else if (ch == '\r' && nextChar == '\n') { Token tok = ScanNewLine(2);// new Token() { Col = col, Line = line, Text = "\r\n", Kind = TokenKind.NewLine }; //report("扫描换行符"); //Next(); Next(); //col = 1; //line++; tokenList.Add(tok); } else if (ch == '\n' || ch == '\r') { Token tok = ScanNewLine(1); tokenList.Add(tok); //SkipLine(); //Token tok = new Token() { Col = col, Line = line, Text = "\r\n", Kind = TokenKind.NewLine }; //Next(); //col = 1; //line++; //tokenList.Add(tok); } else if ("0123456789".IndexOf(ch) != -1) { string str = scanNumber(); var temp = col; if (StringHelper.IsInt(str)) { Token tok = new Token() { Col = temp, Line = line, Text = str, Kind = TokenKind.LiteralInt }; tokenList.Add(tok); } else if (StringHelper.IsFloat(str)) { Token tok = new Token() { Col = temp, Line = line, Text = str, Kind = TokenKind.LiteralFloat }; tokenList.Add(tok); } else { lexError(str + "不是正确的数字"); } } else if (ch == '+' || ch == '+') { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.ADD }; tokenList.Add(tok); Next(); } else if (ch == '-' || ch == '-') { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.SUB }; tokenList.Add(tok); Next(); } else if ((ch == '=' || ch == '=') && (nextChar == '=' || nextChar == '=')) { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.EQ }; tokenList.Add(tok); Next(); Next(); } else if ((ch == '=' || ch == '=') && (nextChar == '>')) { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.AssignTo }; tokenList.Add(tok); Next(); Next(); } else if ((ch == '=' || ch == '=')) { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.Assign }; tokenList.Add(tok); Next(); } else if ((ch == '*')) { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.MUL }; tokenList.Add(tok); Next(); } else if (ch == ',' || ch == ',') { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.Comma }; tokenList.Add(tok); Next(); } else if (ch == ';' || ch == ';') { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.Semi }; tokenList.Add(tok); Next(); } else if (ch == '(' || ch == '(') { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.LBS }; tokenList.Add(tok); Next(); } else if (ch == ')' || ch == ')') { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.RBS }; tokenList.Add(tok); Next(); } else if (ch == '>' && GetNext() == '=') { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.GE }; tokenList.Add(tok); Next(); Next(); } else if (ch == '>') { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.GT }; tokenList.Add(tok); Next(); } else if (ch == '<' && nextChar == '=') { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.LE }; tokenList.Add(tok); Next(); Next(); } else if (ch == '<') { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.LT }; tokenList.Add(tok); Next(); } else if ((nextChar == '!' || nextChar == '!') && (nextChar == '=' || nextChar == '=')) { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.NE }; tokenList.Add(tok); Next(); Next(); } /*else if (ch == ':' && nextChar == ':') * { * Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.Colond }; * tokenList.Add(tok); * Next(); Next(); * }*/ else if (ch == ':' || ch == ':') { Token tok = new Token() { Col = col, Line = line, Kind = TokenKind.Colon }; tokenList.Add(tok); Next(); } else if ((ch >= 'A' && ch <= 'Z') /*|| (ch == '_') */ || (ch >= 'a' && ch <= 'z') || ChineseHelper.IsChineseLetter(ch)) { var tempCol = col; var tempLine = line; Token t1 = scanKeyIdent(); //if (t1.GetText().StartsWith("否则如果") || t1.GetText().StartsWith("否则") || t1.GetText().StartsWith("如果")) //{ // Console.WriteLine("否则如果"); //} //tokenList.Add(t1); if (t1.GetText() == "说明") { // char nchar = GetNext(); if (ch == ':' || ch == ':') { SkipSingleLineComment(); continue;; } } addIdentOrKey(t1); } else if (char.IsControl(ch)) { while (char.IsControl(ch) && ch != END) { Next(); if ((int)ch == 13) { line++; col = 1; } } } else { lexError("无法识别" + (int)ch + ": '" + ch + "' "); Next(); } } return(tokenList); }