public void LexerTest() { var lexer = new Lexer (); lexer.AddDefinition(new TokenDefinition(ETokenType.Invalid, new Regex(@"\*|\/|\+|\-"))); lexer.AddDefinition(new TokenDefinition( ETokenType.Invalid, new Regex(@"\d+"))); lexer.AddDefinition(new TokenDefinition( ETokenType.WhiteSpace, new Regex(@"\s+"), true)); var tokens = lexer.Tokenize("1 * 2 / 3 + 4 - 5"); Assert.AreEqual (17,17); }
/// Author: Max Hamulyak /// Date: 20-06-2015 /// <summary> /// Build the lexer needed to parse a string, based on a programming language language rule. /// </summary> /// <returns>The language lexer.</returns> protected override ILexer BuildLanguageLexer() { ILexer lexer = new Lexer (); Regex endOfLineRegex = new Regex(@"\r\n|\r|\n", RegexOptions.Compiled); lexer.AddDefinition(new TokenDefinition(ETokenType.EOL,endOfLineRegex)); lexer.AddDefinition(new TokenDefinition( ETokenType.WhiteSpace, new Regex(@"[ \t]"))); lexer.AddDefinition(new TokenDefinition(ETokenType.FUNCTIONDeclaration, new Regex(@"def[ \s](?<functionName>[a-z]+[A-Za-z0-9]*)\(\)", RegexOptions.Multiline))); lexer.AddDefinition (new TokenDefinition (ETokenType.Command, new Regex (@"(moveForward\(\)|rotateRight\(\)|pickUp\('([^)]+)\))"))); lexer.AddDefinition(new TokenDefinition(ETokenType.FUNCTIONCall, new Regex(@"(?<functionName>[a-z]+[A-Za-z0-9]*)\(\)", RegexOptions.Multiline))); lexer.AddDefinition (new TokenDefinition(ETokenType.PythonRange, new Regex(@"range\(((\d+),(\d+))\)"))); lexer.AddDefinition (new TokenDefinition (ETokenType.KeyWord, new Regex (@"(in)"))); Regex commentRegex = new Regex (@"\#(.*)", RegexOptions.Multiline); lexer.AddDefinition(new TokenDefinition(ETokenType.CommentLine,commentRegex)); lexer.AddDefinition (new TokenDefinition (ETokenType.Invalid, new Regex (@"(moveBackward\(\)|rotateLeft\(\))"))); #region PythonBlockSyntax forLoopRegex = new Regex(@"for[ \t](?<loopIndex>[a-z]{1}[a-zA-Z]*)[ \t]*[ \t]*in[ \t]*range\((?<startingLow>[0-9]+),(?<endingHigh>[0-9]+)\)", RegexOptions.Multiline); lexer.AddDefinition(new TokenDefinition(ETokenType.FOR,forLoopRegex)); lexer.AddDefinition(new TokenDefinition (ETokenType.WHILE, new Regex (@"\bwhile\b", RegexOptions.Multiline))); lexer.AddDefinition(new TokenDefinition (ETokenType.FOR, new Regex (@"\bfor\b", RegexOptions.Multiline))); lexer.AddDefinition(new TokenDefinition (ETokenType.IF,new Regex (@"\bif\b", RegexOptions.Multiline))); lexer.AddDefinition(new TokenDefinition (ETokenType.ELSEIF, new Regex (@"\belif\b", RegexOptions.Multiline))); lexer.AddDefinition(new TokenDefinition (ETokenType.ELSE, new Regex (@"\belse\b", RegexOptions.Multiline))); // lexer.AddDefinition(new TokenDefinition(ETokenType.FUNCTIONDeclaration, new Regex(@"(?:def\s)[a-z]{3,10}\(\)", RegexOptions.Multiline))); // lexer.AddDefinition(new TokenDefinition(ETokenType.FUNCTIONCall, new Regex(@"[a-z]{3,10}\(\)", RegexOptions.Compiled))); #endregion lexer.AddDefinition (new TokenDefinition (ETokenType.LogicalOperator, new Regex (@"(or|and|not)"))); lexer.AddDefinition(new TokenDefinition (ETokenType.startBlock, new Regex (@"(:)"))); lexer.AddDefinition (new TokenDefinition (ETokenType.logicInstruction, new Regex (@"(left|right|forward|backward|at\('([^)]+)\))"))); lexer.AddDefinition (new TokenDefinition (ETokenType.leftParentheses, new Regex (@"[\(]"))); lexer.AddDefinition (new TokenDefinition (ETokenType.rightParentheses, new Regex (@"[\)]"))); lexer.AddDefinition (new TokenDefinition (ETokenType.Operator, new Regex (@"(\+|\-|\*|\/)"))); lexer.AddDefinition (new TokenDefinition (ETokenType.ComparisonOperator, new Regex (@"(\<=|/>=|\<|\>|==|\!=)"))); lexer.AddDefinition( new TokenDefinition (ETokenType.Literal, new Regex(@"(True|False)"))); lexer.AddDefinition(new TokenDefinition(ETokenType.VARIABLE,new Regex(@"([A-Za-z]+)"))); lexer.AddDefinition (new TokenDefinition (ETokenType.AssignmentOperator, new Regex ("(=)"))); lexer.AddDefinition( new TokenDefinition (ETokenType.Literal, new Regex(@"(\d+)"))); //"[^\"]*\" lexer.AddDefinition( new TokenDefinition (ETokenType.Literal, new Regex(@"\""[^\""]*\"""))); lexer.AddDefinition (new TokenDefinition (ETokenType.KeyWord, new Regex (@"\b(" + string.Join ("|", languageKeywords.Select (Regex.Escape).ToArray ()) + @"\b)", RegexOptions.Multiline))); return lexer; }
/// Author: Max Hamulyak /// Date: 20-06-2015 /// <summary> /// Build the lexer needed to parse a string, based on a programming language language rule. /// </summary> /// <returns>The language lexer.</returns> protected override ILexer BuildLanguageLexer() { ILexer lexer = new Lexer (); forLoopRegex = new Regex (@"for[ \t](?<loopIndex>[a-z]{1}[a-zA-Z]+)[ \t]*\:\=[ \t]*(?<startingLow>[0-9])+[ \t](?<direction>to|downto)[ \t](?<endingHigh>[0-9])+[ \t]do", RegexOptions.Multiline); varDeclarationRegex = new Regex (@"^var[ \s](?<variableName>[A-Za-z]+)\:[ \t](?<variableType>integer|string|boolean);$", RegexOptions.Multiline); lexer.AddDefinition (new TokenDefinition (ETokenType.AssignmentOperator, new Regex ("(:=)"))); lexer.AddDefinition(new TokenDefinition(ETokenType.startBlock,new Regex(";|:"))); Regex commentRegex = new Regex (@"\{(.*)\}", RegexOptions.Multiline); lexer.AddDefinition(new TokenDefinition(ETokenType.CommentLine,commentRegex)); lexer.AddDefinition(new TokenDefinition (ETokenType.ELSEIF, new Regex (@"(\belse[ \t]if\b)", RegexOptions.Multiline))); Regex endOfLineRegex = new Regex(@"\r\n|\r|\n", RegexOptions.Compiled); lexer.AddDefinition(new TokenDefinition(ETokenType.EOL,endOfLineRegex)); lexer.AddDefinition(new TokenDefinition( ETokenType.WhiteSpace, new Regex(@"[ \t]"))); lexer.AddDefinition (new TokenDefinition (ETokenType.Invalid, new Regex (@"([#{}])", RegexOptions.Multiline))); lexer.AddDefinition (new TokenDefinition (ETokenType.endBlock, new Regex (@"(\bend\b;|\bend\b.|\bend\b)", RegexOptions.Multiline))); lexer.AddDefinition(new TokenDefinition (ETokenType.WHILE, new Regex (@"\bwhile\b", RegexOptions.Multiline))); //lexer.AddDefinition(new TokenDefinition (ETokenType.FOR, // new Regex (@"\bfor\b", RegexOptions.Multiline))); lexer.AddDefinition(new TokenDefinition(ETokenType.FOR,forLoopRegex)); lexer.AddDefinition (new TokenDefinition (ETokenType.FOR, new Regex (@"\bfor\b", RegexOptions.Multiline))); lexer.AddDefinition(new TokenDefinition (ETokenType.IF, new Regex (@"\bif\b", RegexOptions.Multiline))); lexer.AddDefinition(new TokenDefinition (ETokenType.ELSE, new Regex (@"\belse\b", RegexOptions.Multiline))); lexer.AddDefinition (new TokenDefinition (ETokenType.VARIABLE, varDeclarationRegex)); lexer.AddDefinition (new TokenDefinition (ETokenType.Command, new Regex (@"(moveForward\(\)|rotateRight\(\)|pickUp\('([^)]+)\))"))); lexer.AddDefinition(new TokenDefinition(ETokenType.FUNCTIONDeclaration, new Regex(@"procedure[ \s](?<functionName>[a-z]+[A-Za-z0-9]*)\(\)", RegexOptions.Multiline))); lexer.AddDefinition(new TokenDefinition(ETokenType.FUNCTIONCall, new Regex(@"(?<functionName>[a-z]+[A-Za-z0-9]*)\(\)", RegexOptions.Multiline))); lexer.AddDefinition (new TokenDefinition (ETokenType.logicInstruction, new Regex (@"(left|right|forward|backward|at\('([^)]+)\))"))); lexer.AddDefinition(new TokenDefinition (ETokenType.startBlock, new Regex (@"\bbegin\b", RegexOptions.Multiline))); lexer.AddDefinition (new TokenDefinition (ETokenType.LogicalOperator, new Regex (@"(or|and|not)"))); lexer.AddDefinition (new TokenDefinition (ETokenType.KeyWord, new Regex (@"\b(" + string.Join ("|", languageKeywords.Select (Regex.Escape).ToArray ()) + @"\b)", RegexOptions.Multiline))); lexer.AddDefinition (new TokenDefinition (ETokenType.leftParentheses, new Regex (@"\(", RegexOptions.Multiline))); lexer.AddDefinition (new TokenDefinition (ETokenType.rightParentheses, new Regex (@"[\)]"))); lexer.AddDefinition (new TokenDefinition (ETokenType.Operator, new Regex (@"(\+|\-|\*|\/)"))); lexer.AddDefinition (new TokenDefinition (ETokenType.ComparisonOperator, new Regex (@"(\<=|/>=|\<|\>|==|\!=)"))); lexer.AddDefinition(new TokenDefinition(ETokenType.VARIABLE,new Regex(@"([A-Za-z]+)"))); //lexer.AddDefinition(new TokenDefinition(ETokenType.VARIABLE,new Regex(@"(^[a-zA-Z]+$)", RegexOptions.Multiline))); lexer.AddDefinition( new TokenDefinition (ETokenType.Literal, new Regex(@"(\d+)"))); lexer.AddDefinition( new TokenDefinition (ETokenType.Literal, new Regex(@"(True|False)"))); //"[^\"]*\" lexer.AddDefinition( new TokenDefinition (ETokenType.Literal, new Regex(@"\""[^\""]*\"""))); lexer.AddDefinition (new TokenDefinition (ETokenType.DataType, new Regex (@"(integer|string|boolean)"))); return lexer; }