public void UpdateRange(Token token) { if (token.StartPos < startpos) startpos = token.StartPos; if (token.EndPos > endpos) endpos = token.EndPos; }
/// <summary> /// returns token with longest best match /// </summary> /// <returns></returns> public Token LookAhead(params TokenType[] expectedtokens) { int i; int startpos = StartPos; int endpos = EndPos; int currentline = CurrentLine; string currentFile = CurrentFile; Token tok = null; List<TokenType> scantokens; // this prevents double scanning and matching // increased performance if (LookAheadToken != null && LookAheadToken.Type != TokenType._UNDETERMINED_ && LookAheadToken.Type != TokenType._NONE_) return LookAheadToken; // if no scantokens specified, then scan for all of them (= backward compatible) if (expectedtokens.Length == 0) scantokens = Tokens; else { scantokens = new List<TokenType>(expectedtokens); scantokens.AddRange(SkipList); } do { int len = -1; TokenType index = (TokenType)int.MaxValue; string input = Input.Substring(startpos); tok = new Token(startpos, endpos); for (i = 0; i < scantokens.Count; i++) { Regex r = Patterns[scantokens[i]]; Match m = r.Match(input); if (m.Success && m.Index == 0 && ((m.Length > len) || (scantokens[i] < index && m.Length == len ))) { len = m.Length; index = scantokens[i]; } } if (index >= 0 && len >= 0) { tok.EndPos = startpos + len; tok.Text = Input.Substring(tok.StartPos, len); tok.Type = index; } else if (tok.StartPos == tok.EndPos) { if (tok.StartPos < Input.Length) tok.Text = Input.Substring(tok.StartPos, 1); else tok.Text = "EOF"; } // Update the line and column count for error reporting. tok.File = currentFile; tok.Line = currentline; if (tok.StartPos < Input.Length) tok.Column = tok.StartPos - Input.LastIndexOf('\n', tok.StartPos); if (SkipList.Contains(tok.Type)) { startpos = tok.EndPos; endpos = tok.EndPos; currentline = tok.Line + (tok.Text.Length - tok.Text.Replace("\n", "").Length); currentFile = tok.File; Skipped.Add(tok); } else { // only assign to non-skipped tokens tok.Skipped = Skipped; // assign prior skips to this token Skipped = new List<Token>(); //reset skips } // Check to see if the parsed token wants to // alter the file and line number. if (tok.Type == FileAndLine) { var match = Patterns[tok.Type].Match(tok.Text); var fileMatch = match.Groups["File"]; if (fileMatch.Success) currentFile = fileMatch.Value; var lineMatch = match.Groups["Line"]; if (lineMatch.Success) currentline = int.Parse(lineMatch.Value); } } while (SkipList.Contains(tok.Type)); LookAheadToken = tok; return tok; }
/// <summary> /// executes a lookahead of the next token /// and will advance the scan on the input string /// </summary> /// <returns></returns> public Token Scan(params TokenType[] expectedtokens) { Token tok = LookAhead(expectedtokens); // temporarely retrieve the lookahead LookAheadToken = null; // reset lookahead token, so scanning will continue StartPos = tok.EndPos; EndPos = tok.EndPos; // set the tokenizer to the new scan position CurrentLine = tok.Line + (tok.Text.Length - tok.Text.Replace("\n", "").Length); CurrentFile = tok.File; return tok; }
public void Init(string input, string fileName) { this.Input = input; StartPos = 0; EndPos = 0; CurrentFile = fileName; CurrentLine = 1; CurrentColumn = 1; CurrentPosition = 0; LookAheadToken = null; }
public Token GetToken(TokenType type) { Token t = new Token(this.StartPos, this.EndPos); t.Type = type; return t; }
public Scanner() { Regex regex; Patterns = new Dictionary<TokenType, Regex>(); Tokens = new List<TokenType>(); LookAheadToken = null; Skipped = new List<Token>(); SkipList = new List<TokenType>(); SkipList.Add(TokenType.WHITESPACE); SkipList.Add(TokenType.COMMENTLINE); regex = new Regex(@"(\+|-)"); Patterns.Add(TokenType.PLUSMINUS, regex); Tokens.Add(TokenType.PLUSMINUS); regex = new Regex(@"\*"); Patterns.Add(TokenType.MULT, regex); Tokens.Add(TokenType.MULT); regex = new Regex(@"/"); Patterns.Add(TokenType.DIV, regex); Tokens.Add(TokenType.DIV); regex = new Regex(@"\^"); Patterns.Add(TokenType.POWER, regex); Tokens.Add(TokenType.POWER); regex = new Regex(@"(?i)\be\b"); Patterns.Add(TokenType.E, regex); Tokens.Add(TokenType.E); regex = new Regex(@"(?i)\bnot\b"); Patterns.Add(TokenType.NOT, regex); Tokens.Add(TokenType.NOT); regex = new Regex(@"(?i)\band\b"); Patterns.Add(TokenType.AND, regex); Tokens.Add(TokenType.AND); regex = new Regex(@"(?i)\bor\b"); Patterns.Add(TokenType.OR, regex); Tokens.Add(TokenType.OR); regex = new Regex(@"(?i)\btrue\b|\bfalse\b"); Patterns.Add(TokenType.TRUEFALSE, regex); Tokens.Add(TokenType.TRUEFALSE); regex = new Regex(@"<>|>=|<=|=|>|<"); Patterns.Add(TokenType.COMPARATOR, regex); Tokens.Add(TokenType.COMPARATOR); regex = new Regex(@"(?i)\bset\b"); Patterns.Add(TokenType.SET, regex); Tokens.Add(TokenType.SET); regex = new Regex(@"(?i)\bto\b"); Patterns.Add(TokenType.TO, regex); Tokens.Add(TokenType.TO); regex = new Regex(@"(?i)\bis\b"); Patterns.Add(TokenType.IS, regex); Tokens.Add(TokenType.IS); regex = new Regex(@"(?i)\bif\b"); Patterns.Add(TokenType.IF, regex); Tokens.Add(TokenType.IF); regex = new Regex(@"(?i)\belse\b"); Patterns.Add(TokenType.ELSE, regex); Tokens.Add(TokenType.ELSE); regex = new Regex(@"(?i)\buntil\b"); Patterns.Add(TokenType.UNTIL, regex); Tokens.Add(TokenType.UNTIL); regex = new Regex(@"(?i)\bstep\b"); Patterns.Add(TokenType.STEP, regex); Tokens.Add(TokenType.STEP); regex = new Regex(@"(?i)\bdo\b"); Patterns.Add(TokenType.DO, regex); Tokens.Add(TokenType.DO); regex = new Regex(@"(?i)\block\b"); Patterns.Add(TokenType.LOCK, regex); Tokens.Add(TokenType.LOCK); regex = new Regex(@"(?i)\bunlock\b"); Patterns.Add(TokenType.UNLOCK, regex); Tokens.Add(TokenType.UNLOCK); regex = new Regex(@"(?i)\bprint\b"); Patterns.Add(TokenType.PRINT, regex); Tokens.Add(TokenType.PRINT); regex = new Regex(@"(?i)\bat\b"); Patterns.Add(TokenType.AT, regex); Tokens.Add(TokenType.AT); regex = new Regex(@"(?i)\bon\b"); Patterns.Add(TokenType.ON, regex); Tokens.Add(TokenType.ON); regex = new Regex(@"(?i)\btoggle\b"); Patterns.Add(TokenType.TOGGLE, regex); Tokens.Add(TokenType.TOGGLE); regex = new Regex(@"(?i)\bwait\b"); Patterns.Add(TokenType.WAIT, regex); Tokens.Add(TokenType.WAIT); regex = new Regex(@"(?i)\bwhen\b"); Patterns.Add(TokenType.WHEN, regex); Tokens.Add(TokenType.WHEN); regex = new Regex(@"(?i)\bthen\b"); Patterns.Add(TokenType.THEN, regex); Tokens.Add(TokenType.THEN); regex = new Regex(@"(?i)\boff\b"); Patterns.Add(TokenType.OFF, regex); Tokens.Add(TokenType.OFF); regex = new Regex(@"(?i)\bstage\b"); Patterns.Add(TokenType.STAGE, regex); Tokens.Add(TokenType.STAGE); regex = new Regex(@"(?i)\bclearscreen\b"); Patterns.Add(TokenType.CLEARSCREEN, regex); Tokens.Add(TokenType.CLEARSCREEN); regex = new Regex(@"(?i)\badd\b"); Patterns.Add(TokenType.ADD, regex); Tokens.Add(TokenType.ADD); regex = new Regex(@"(?i)\bremove\b"); Patterns.Add(TokenType.REMOVE, regex); Tokens.Add(TokenType.REMOVE); regex = new Regex(@"(?i)\blog\b"); Patterns.Add(TokenType.LOG, regex); Tokens.Add(TokenType.LOG); regex = new Regex(@"(?i)\bbreak\b"); Patterns.Add(TokenType.BREAK, regex); Tokens.Add(TokenType.BREAK); regex = new Regex(@"(?i)\bpreserve\b"); Patterns.Add(TokenType.PRESERVE, regex); Tokens.Add(TokenType.PRESERVE); regex = new Regex(@"(?i)\bdeclare\b"); Patterns.Add(TokenType.DECLARE, regex); Tokens.Add(TokenType.DECLARE); regex = new Regex(@"(?i)\blocal\b"); Patterns.Add(TokenType.LOCAL, regex); Tokens.Add(TokenType.LOCAL); regex = new Regex(@"(?i)\bglobal\b"); Patterns.Add(TokenType.GLOBAL, regex); Tokens.Add(TokenType.GLOBAL); regex = new Regex(@"(?i)\bparameter\b"); Patterns.Add(TokenType.PARAMETER, regex); Tokens.Add(TokenType.PARAMETER); regex = new Regex(@"(?i)\bfunction\b"); Patterns.Add(TokenType.FUNCTION, regex); Tokens.Add(TokenType.FUNCTION); regex = new Regex(@"(?i)\breturn\b"); Patterns.Add(TokenType.RETURN, regex); Tokens.Add(TokenType.RETURN); regex = new Regex(@"(?i)\bswitch\b"); Patterns.Add(TokenType.SWITCH, regex); Tokens.Add(TokenType.SWITCH); regex = new Regex(@"(?i)\bcopy\b"); Patterns.Add(TokenType.COPY, regex); Tokens.Add(TokenType.COPY); regex = new Regex(@"(?i)\bfrom\b"); Patterns.Add(TokenType.FROM, regex); Tokens.Add(TokenType.FROM); regex = new Regex(@"(?i)\brename\b"); Patterns.Add(TokenType.RENAME, regex); Tokens.Add(TokenType.RENAME); regex = new Regex(@"(?i)\bvolume\b"); Patterns.Add(TokenType.VOLUME, regex); Tokens.Add(TokenType.VOLUME); regex = new Regex(@"(?i)\bfile\b"); Patterns.Add(TokenType.FILE, regex); Tokens.Add(TokenType.FILE); regex = new Regex(@"(?i)\bdelete\b"); Patterns.Add(TokenType.DELETE, regex); Tokens.Add(TokenType.DELETE); regex = new Regex(@"(?i)\bedit\b"); Patterns.Add(TokenType.EDIT, regex); Tokens.Add(TokenType.EDIT); regex = new Regex(@"(?i)\brun\b"); Patterns.Add(TokenType.RUN, regex); Tokens.Add(TokenType.RUN); regex = new Regex(@"(?i)\bcompile\b"); Patterns.Add(TokenType.COMPILE, regex); Tokens.Add(TokenType.COMPILE); regex = new Regex(@"(?i)\blist\b"); Patterns.Add(TokenType.LIST, regex); Tokens.Add(TokenType.LIST); regex = new Regex(@"(?i)\breboot\b"); Patterns.Add(TokenType.REBOOT, regex); Tokens.Add(TokenType.REBOOT); regex = new Regex(@"(?i)\bshutdown\b"); Patterns.Add(TokenType.SHUTDOWN, regex); Tokens.Add(TokenType.SHUTDOWN); regex = new Regex(@"(?i)\bfor\b"); Patterns.Add(TokenType.FOR, regex); Tokens.Add(TokenType.FOR); regex = new Regex(@"(?i)\bunset\b"); Patterns.Add(TokenType.UNSET, regex); Tokens.Add(TokenType.UNSET); regex = new Regex(@"\("); Patterns.Add(TokenType.BRACKETOPEN, regex); Tokens.Add(TokenType.BRACKETOPEN); regex = new Regex(@"\)"); Patterns.Add(TokenType.BRACKETCLOSE, regex); Tokens.Add(TokenType.BRACKETCLOSE); regex = new Regex(@"\{"); Patterns.Add(TokenType.CURLYOPEN, regex); Tokens.Add(TokenType.CURLYOPEN); regex = new Regex(@"\}"); Patterns.Add(TokenType.CURLYCLOSE, regex); Tokens.Add(TokenType.CURLYCLOSE); regex = new Regex(@"\["); Patterns.Add(TokenType.SQUAREOPEN, regex); Tokens.Add(TokenType.SQUAREOPEN); regex = new Regex(@"\]"); Patterns.Add(TokenType.SQUARECLOSE, regex); Tokens.Add(TokenType.SQUARECLOSE); regex = new Regex(@","); Patterns.Add(TokenType.COMMA, regex); Tokens.Add(TokenType.COMMA); regex = new Regex(@":"); Patterns.Add(TokenType.COLON, regex); Tokens.Add(TokenType.COLON); regex = new Regex(@"(?i)\bin\b"); Patterns.Add(TokenType.IN, regex); Tokens.Add(TokenType.IN); regex = new Regex(@"#"); Patterns.Add(TokenType.ARRAYINDEX, regex); Tokens.Add(TokenType.ARRAYINDEX); regex = new Regex(@"(?i)\ball\b"); Patterns.Add(TokenType.ALL, regex); Tokens.Add(TokenType.ALL); regex = new Regex(@"(?i)[a-z_][a-z0-9_]*"); Patterns.Add(TokenType.IDENTIFIER, regex); Tokens.Add(TokenType.IDENTIFIER); regex = new Regex(@"(?i)[a-z_][a-z0-9_]*(\.[a-z0-9_][a-z0-9_]*)*"); Patterns.Add(TokenType.FILEIDENT, regex); Tokens.Add(TokenType.FILEIDENT); regex = new Regex(@"[0-9]+"); Patterns.Add(TokenType.INTEGER, regex); Tokens.Add(TokenType.INTEGER); regex = new Regex(@"[0-9]*\.[0-9]+"); Patterns.Add(TokenType.DOUBLE, regex); Tokens.Add(TokenType.DOUBLE); regex = new Regex(@"@?\""(\""\""|[^\""])*\"""); Patterns.Add(TokenType.STRING, regex); Tokens.Add(TokenType.STRING); regex = new Regex(@"\."); Patterns.Add(TokenType.EOI, regex); Tokens.Add(TokenType.EOI); regex = new Regex(@"@"); Patterns.Add(TokenType.ATSIGN, regex); Tokens.Add(TokenType.ATSIGN); regex = new Regex(@"(?i)\blazyglobal\b"); Patterns.Add(TokenType.LAZYGLOBAL, regex); Tokens.Add(TokenType.LAZYGLOBAL); regex = new Regex(@"^$"); Patterns.Add(TokenType.EOF, regex); Tokens.Add(TokenType.EOF); regex = new Regex(@"\s+"); Patterns.Add(TokenType.WHITESPACE, regex); Tokens.Add(TokenType.WHITESPACE); regex = new Regex(@"//[^\n]*\n?"); Patterns.Add(TokenType.COMMENTLINE, regex); Tokens.Add(TokenType.COMMENTLINE); }
/// <summary> /// Edit the parse branch for a loopfrom statement, rearranging its component /// parts into a simpler unrolled form.<br/> /// When given this rule:<br/> /// <br/> /// FROM {(init statements)} UNTIL expr STEP {(inc statements)} DO {(body statements)} <br/> /// <br/> /// It will edit its own child nodes and transform them into a new parse tree branch as if this had /// been what was in the source code instead:<br/> /// <br/> /// { (init statements) UNTIL expr { (body statements) (inc statements) } }<br/> /// <br/> /// Thus any variables declared inside (init statements) are in scope during the body of the loop.<br/> /// The actual logic of doing an UNTIL loop will fall upon VisitUntilNode to deal with later in the compile.<br/> /// </summary> /// <param name="node"></param> private void RearrangeLoopFromNode(ParseNode node) { // Safety check to see if I've already been rearranged into my final form, just in case // the recursion logic is messed up and this gets called twice on the same node: if (node.Nodes.Count == 1 && node.Nodes[0].Token.Type == TokenType.instruction_block) return; // ReSharper disable RedundantDefaultFieldInitializer ParseNode initBlock = null; ParseNode checkExpression = null; ParseNode untilTokenNode = null; ParseNode stepBlock = null; ParseNode doBlock = null; // ReSharper enable RedundantDefaultFieldInitializer for( int index = 0 ; index < node.Nodes.Count - 1 ; index += 2 ) { switch (node.Nodes[index].Token.Type) { case TokenType.FROM: initBlock = node.Nodes[index+1]; break; case TokenType.UNTIL: untilTokenNode = node.Nodes[index]; checkExpression = node.Nodes[index+1]; break; case TokenType.STEP: stepBlock = node.Nodes[index+1]; break; case TokenType.DO: doBlock = node.Nodes[index+1]; break; // no default because anything else is a syntax error and it won't even get as far as this method in that case. } } // These probably can't happen because the parser would have barfed before it got to this method: if (initBlock == null) throw new KOSCompileException("Missing FROM block in FROM loop."); if (checkExpression == null || untilTokenNode == null) throw new KOSCompileException("Missing UNTIL check expression in FROM loop."); if (stepBlock == null) throw new KOSCompileException("Missing STEP block in FROM loop."); if (doBlock == null) throw new KOSCompileException("Missing loop body (DO block) in FROM loop."); // Append the step instructions to the tail end of the body block's instructions: foreach (ParseNode child in stepBlock.Nodes) doBlock.Nodes.Add(child); // Make a new empty until loop node, which will get added to the init block eventually: var untilStatementTok = new Token { Type = TokenType.until_stmt, Line = untilTokenNode.Token.Line, Column = untilTokenNode.Token.Column, File = untilTokenNode.Token.File }; ParseNode untilNode = initBlock.CreateNode(untilStatementTok, untilStatementTok.ToString()); // (The direct manipulation of the tree's parent pointers, seen below, is bad form, // but TinyPg doesn't seem to have given us good primitives to append an existing node to the tree to do it for us. // CreateNode() makes a brand new empty node attached to the parent, but there seems to be no way to take an // existing node and attach it elsewhere without directly changing the Parent property as seen in the lines below:) // Populate that until loop node with the parts from this rule: untilNode.Nodes.Add(untilTokenNode); untilTokenNode.Parent = untilNode; untilNode.Nodes.Add(checkExpression); checkExpression.Parent = untilNode; untilNode.Nodes.Add(doBlock); doBlock.Parent = untilNode; // And now append that until loop to the tail end of the init block: initBlock.Nodes.Add(untilNode); // parent already assigned by initBlock.CreateNode() above. // The init block is now actually the entire loop, having been exploded and unrolled into its // new form, make that be our only node: node.Nodes.Clear(); node.Nodes.Add(initBlock); // initBlock's parent already points at node to begin with. // The FROM loop node is still in the parent's list, but it contains this new rearranged sub-tree // instead of its original. }
public KOSCompileException(Token token, string message) : this(new LineCol(token.Line, token.Column), message) { }