Beispiel #1
0
        public Scanner()
        {
            Regex regex;
            Patterns = new Dictionary<TokenType, Regex>();
            Tokens = new List<TokenType>();
            LookAheadToken = null;

            SkipList = new List<TokenType>();
            SkipList.Add(TokenType.WHITESPACE);

            regex = new Regex(@"[0-9]+", RegexOptions.Compiled);
            Patterns.Add(TokenType.NUMBER, regex);
            Tokens.Add(TokenType.NUMBER);

            regex = new Regex(@"(\+|-)", RegexOptions.Compiled);
            Patterns.Add(TokenType.PLUSMINUS, regex);
            Tokens.Add(TokenType.PLUSMINUS);

            regex = new Regex(@"\*|/", RegexOptions.Compiled);
            Patterns.Add(TokenType.MULTDIV, regex);
            Tokens.Add(TokenType.MULTDIV);

            regex = new Regex(@"\(", RegexOptions.Compiled);
            Patterns.Add(TokenType.BROPEN, regex);
            Tokens.Add(TokenType.BROPEN);

            regex = new Regex(@"\)", RegexOptions.Compiled);
            Patterns.Add(TokenType.BRCLOSE, regex);
            Tokens.Add(TokenType.BRCLOSE);

            regex = new Regex(@"^$", RegexOptions.Compiled);
            Patterns.Add(TokenType.EOF, regex);
            Tokens.Add(TokenType.EOF);

            regex = new Regex(@"\s+", RegexOptions.Compiled);
            Patterns.Add(TokenType.WHITESPACE, regex);
            Tokens.Add(TokenType.WHITESPACE);
        }
 public void UpdateRange(Token token)
 {
     if (token.StartPos < startpos) startpos = token.StartPos;
     if (token.EndPos > endpos) endpos = token.EndPos;
 }
        private List<TokenType> SkipList; // tokens to be skipped

        public Scanner()
        {
            Regex regex;
            Patterns = new Dictionary<TokenType, Regex>();
            Tokens = new List<TokenType>();
            LookAheadToken = null;
            Skipped = new List<Token>();

            SkipList = new List<TokenType>();
            SkipList.Add(TokenType.SPACE);

            regex = new Regex(@"(?i)id:", RegexOptions.Compiled);
            Patterns.Add(TokenType.IdKeyword1, regex);
            Tokens.Add(TokenType.IdKeyword1);

            regex = new Regex(@"#", RegexOptions.Compiled);
            Patterns.Add(TokenType.IdKeyword2, regex);
            Tokens.Add(TokenType.IdKeyword2);

            regex = new Regex(@"time:", RegexOptions.Compiled);
            Patterns.Add(TokenType.TimeKeyword, regex);
            Tokens.Add(TokenType.TimeKeyword);

            regex = new Regex(@"status:", RegexOptions.Compiled);
            Patterns.Add(TokenType.StatusKeyword1, regex);
            Tokens.Add(TokenType.StatusKeyword1);

            regex = new Regex(@"state:", RegexOptions.Compiled);
            Patterns.Add(TokenType.StatusKeyword2, regex);
            Tokens.Add(TokenType.StatusKeyword2);

            regex = new Regex(@"comment:", RegexOptions.Compiled);
            Patterns.Add(TokenType.CommentKeyword1, regex);
            Tokens.Add(TokenType.CommentKeyword1);

            regex = new Regex(@"comments:", RegexOptions.Compiled);
            Patterns.Add(TokenType.CommentKeyword2, regex);
            Tokens.Add(TokenType.CommentKeyword2);

            regex = new Regex(@"comm:", RegexOptions.Compiled);
            Patterns.Add(TokenType.CommentKeyword3, regex);
            Tokens.Add(TokenType.CommentKeyword3);

            regex = new Regex(@"cmt:", RegexOptions.Compiled);
            Patterns.Add(TokenType.CommentKeyword4, regex);
            Tokens.Add(TokenType.CommentKeyword4);

            regex = new Regex(@"^$", RegexOptions.Compiled);
            Patterns.Add(TokenType.EOF, regex);
            Tokens.Add(TokenType.EOF);

            regex = new Regex(@"\s+", RegexOptions.Compiled);
            Patterns.Add(TokenType.SPACE, regex);
            Tokens.Add(TokenType.SPACE);

            regex = new Regex(@":", RegexOptions.Compiled);
            Patterns.Add(TokenType.Delimiter, regex);
            Tokens.Add(TokenType.Delimiter);

            regex = new Regex(@"[,\.\?!;]+", RegexOptions.Compiled);
            Patterns.Add(TokenType.PUNCTUATIONMARK, regex);
            Tokens.Add(TokenType.PUNCTUATIONMARK);

            regex = new Regex(@",", RegexOptions.Compiled);
            Patterns.Add(TokenType.COMMA, regex);
            Tokens.Add(TokenType.COMMA);

            regex = new Regex(@"[0-9]+", RegexOptions.Compiled);
            Patterns.Add(TokenType.NUMBER, regex);
            Tokens.Add(TokenType.NUMBER);

            regex = new Regex(@"\d+([\.,]\d+)?", RegexOptions.Compiled);
            Patterns.Add(TokenType.DECIMAL, regex);
            Tokens.Add(TokenType.DECIMAL);

            regex = new Regex(@"(?i)(.(?<!#)(?<!(id|time|status|state|comment|comm|comments|cmt):))+\s", RegexOptions.Compiled);
            Patterns.Add(TokenType.ANY_TEXT, regex);
            Tokens.Add(TokenType.ANY_TEXT);

            regex = new Regex(@"(?i)((?!#)(?!(id|time|status|state|comment|comm|comments|cmt):).)", RegexOptions.Compiled);
            Patterns.Add(TokenType.ANY_SYMBOL, regex);
            Tokens.Add(TokenType.ANY_SYMBOL);


        }
        /// <summary>
        /// returns token with longest best match
        /// </summary>
        /// <returns></returns>
        public Token LookAhead(params TokenType[] expectedtokens)
        {
            int i;
            int startpos = StartPos;
            Token tok = null;
            List<TokenType> scantokens;


            // this prevents double scanning and matching
            // increased performance
            if (LookAheadToken != null 
                && LookAheadToken.Type != TokenType._UNDETERMINED_ 
                && LookAheadToken.Type != TokenType._NONE_) return LookAheadToken;

            // if no scantokens specified, then scan for all of them (= backward compatible)
            if (expectedtokens.Length == 0)
                scantokens = Tokens;
            else
            {
                scantokens = new List<TokenType>(expectedtokens);
                scantokens.AddRange(SkipList);
            }

            do
            {

                int len = -1;
                TokenType index = (TokenType)int.MaxValue;
                string input = Input.Substring(startpos);

                tok = new Token(startpos, EndPos);

                for (i = 0; i < scantokens.Count; i++)
                {
                    Regex r = Patterns[scantokens[i]];
                    Match m = r.Match(input);
                    if (m.Success && m.Index == 0 && ((m.Length > len) || (scantokens[i] < index && m.Length == len )))
                    {
                        len = m.Length;
                        index = scantokens[i];  
                    }
                }

                if (index >= 0 && len >= 0)
                {
                    tok.EndPos = startpos + len;
                    tok.Text = Input.Substring(tok.StartPos, len);
                    tok.Type = index;
                }
                else if (tok.StartPos < tok.EndPos - 1)
                {
                    tok.Text = Input.Substring(tok.StartPos, 1);
                }

                if (SkipList.Contains(tok.Type))
                {
                    startpos = tok.EndPos;
                    Skipped.Add(tok);
                }
                else
                {
                    // only assign to non-skipped tokens
                    tok.Skipped = Skipped; // assign prior skips to this token
                    Skipped = new List<Token>(); //reset skips
                }
            }
            while (SkipList.Contains(tok.Type));

            LookAheadToken = tok;
            return tok;
        }
  /// <summary>
 /// executes a lookahead of the next token
 /// and will advance the scan on the input string
 /// </summary>
 /// <returns></returns>
 public Token Scan(params TokenType[] expectedtokens)
 {
     Token tok = LookAhead(expectedtokens); // temporarely retrieve the lookahead
     LookAheadToken = null; // reset lookahead token, so scanning will continue
     StartPos = tok.EndPos;
     EndPos = tok.EndPos; // set the tokenizer to the new scan position
     return tok;
 }
 public Token GetToken(TokenType type)
 {
     Token t = new Token(this.StartPos, this.EndPos);
     t.Type = type;
     return t;
 }
 public void Init(string input)
 {
     this.Input = input;
     StartPos = 0;
     EndPos = 0;
     CurrentLine = 0;
     CurrentColumn = 0;
     CurrentPosition = 0;
     LookAheadToken = null;
 }
Beispiel #8
0
        public Scanner()
        {
            Regex regex;
            Patterns = new Dictionary<TokenType, Regex>();
            Tokens = new List<TokenType>();
            LookAheadToken = null;
            Skipped = new List<Token>();

            SkipList = new List<TokenType>();
            SkipList.Add(TokenType.WHITESPACE);
            SkipList.Add(TokenType.COMMENTLINE);
            SkipList.Add(TokenType.COMMENTBLOCK);

            regex = new Regex(@"\(", RegexOptions.Compiled);
            Patterns.Add(TokenType.BRACKETOPEN, regex);
            Tokens.Add(TokenType.BRACKETOPEN);

            regex = new Regex(@"\)", RegexOptions.Compiled);
            Patterns.Add(TokenType.BRACKETCLOSE, regex);
            Tokens.Add(TokenType.BRACKETCLOSE);

            regex = new Regex(@"\{[^\}]*\}([^};][^}]*\}+)*;", RegexOptions.Compiled);
            Patterns.Add(TokenType.CODEBLOCK, regex);
            Tokens.Add(TokenType.CODEBLOCK);

            regex = new Regex(@",", RegexOptions.Compiled);
            Patterns.Add(TokenType.COMMA, regex);
            Tokens.Add(TokenType.COMMA);

            regex = new Regex(@"\[", RegexOptions.Compiled);
            Patterns.Add(TokenType.SQUAREOPEN, regex);
            Tokens.Add(TokenType.SQUAREOPEN);

            regex = new Regex(@"\]", RegexOptions.Compiled);
            Patterns.Add(TokenType.SQUARECLOSE, regex);
            Tokens.Add(TokenType.SQUARECLOSE);

            regex = new Regex(@"=", RegexOptions.Compiled);
            Patterns.Add(TokenType.ASSIGN, regex);
            Tokens.Add(TokenType.ASSIGN);

            regex = new Regex(@"\|", RegexOptions.Compiled);
            Patterns.Add(TokenType.PIPE, regex);
            Tokens.Add(TokenType.PIPE);

            regex = new Regex(@";", RegexOptions.Compiled);
            Patterns.Add(TokenType.SEMICOLON, regex);
            Tokens.Add(TokenType.SEMICOLON);

            regex = new Regex(@"(\*|\+|\?)", RegexOptions.Compiled);
            Patterns.Add(TokenType.UNARYOPER, regex);
            Tokens.Add(TokenType.UNARYOPER);

            regex = new Regex(@"[a-zA-Z_][a-zA-Z0-9_]*", RegexOptions.Compiled);
            Patterns.Add(TokenType.IDENTIFIER, regex);
            Tokens.Add(TokenType.IDENTIFIER);

            regex = new Regex(@"[0-9]+", RegexOptions.Compiled);
            Patterns.Add(TokenType.INTEGER, regex);
            Tokens.Add(TokenType.INTEGER);

            regex = new Regex(@"[0-9]*\.[0-9]+", RegexOptions.Compiled);
            Patterns.Add(TokenType.DOUBLE, regex);
            Tokens.Add(TokenType.DOUBLE);

            regex = new Regex(@"(0x[0-9a-fA-F]{6})", RegexOptions.Compiled);
            Patterns.Add(TokenType.HEX, regex);
            Tokens.Add(TokenType.HEX);

            regex = new Regex(@"->", RegexOptions.Compiled);
            Patterns.Add(TokenType.ARROW, regex);
            Tokens.Add(TokenType.ARROW);

            regex = new Regex(@"<%\s*@", RegexOptions.Compiled);
            Patterns.Add(TokenType.DIRECTIVEOPEN, regex);
            Tokens.Add(TokenType.DIRECTIVEOPEN);

            regex = new Regex(@"%>", RegexOptions.Compiled);
            Patterns.Add(TokenType.DIRECTIVECLOSE, regex);
            Tokens.Add(TokenType.DIRECTIVECLOSE);

            regex = new Regex(@"^$", RegexOptions.Compiled);
            Patterns.Add(TokenType.EOF, regex);
            Tokens.Add(TokenType.EOF);

            regex = new Regex(@"@?\""(\""\""|[^\""])*\""", RegexOptions.Compiled);
            Patterns.Add(TokenType.STRING, regex);
            Tokens.Add(TokenType.STRING);

            regex = new Regex(@"\s+", RegexOptions.Compiled);
            Patterns.Add(TokenType.WHITESPACE, regex);
            Tokens.Add(TokenType.WHITESPACE);

            regex = new Regex(@"//[^\n]*\n?", RegexOptions.Compiled);
            Patterns.Add(TokenType.COMMENTLINE, regex);
            Tokens.Add(TokenType.COMMENTLINE);

            regex = new Regex(@"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/", RegexOptions.Compiled);
            Patterns.Add(TokenType.COMMENTBLOCK, regex);
            Tokens.Add(TokenType.COMMENTBLOCK);
        }
Beispiel #9
0
 /// <summary>
 /// executes a lookahead of the next token
 /// and will advance the scan on the input string
 /// </summary>
 /// <returns></returns>
 public Token Scan(params TokenType[] expectedtokens)
 {
     Token tok = LookAhead(expectedtokens); // temporarely retrieve the lookahead
     LookAheadToken = null; // reset lookahead token, so scanning will continue
     StartPos = tok.EndPos;
     EndPos = tok.EndPos; // set the tokenizer to the new scan position
     CurrentLine = tok.Line + (tok.Text.Length - tok.Text.Replace("\n", "").Length);
     CurrentFile = tok.File;
     return tok;
 }
Beispiel #10
0
        /// <summary>
        /// returns token with longest best match
        /// </summary>
        /// <returns></returns>
        public Token LookAhead(params TokenType[] expectedtokens)
        {
            int i;
            int startpos = StartPos;
            int endpos = EndPos;
            int currentline = CurrentLine;
            string currentFile = CurrentFile;
            Token tok = null;
            List<TokenType> scantokens;

            // this prevents double scanning and matching
            // increased performance
            if (LookAheadToken != null
                && LookAheadToken.Type != TokenType._UNDETERMINED_
                && LookAheadToken.Type != TokenType._NONE_) return LookAheadToken;

            // if no scantokens specified, then scan for all of them (= backward compatible)
            if (expectedtokens.Length == 0)
                scantokens = Tokens;
            else
            {
                scantokens = new List<TokenType>(expectedtokens);
                scantokens.AddRange(SkipList);
            }

            do
            {

                int len = -1;
                TokenType index = (TokenType)int.MaxValue;
                string input = Input.Substring(startpos);

                tok = new Token(startpos, endpos);

                for (i = 0; i < scantokens.Count; i++)
                {
                    Regex r = Patterns[scantokens[i]];
                    Match m = r.Match(input);
                    if (m.Success && m.Index == 0 && ((m.Length > len) || (scantokens[i] < index && m.Length == len )))
                    {
                        len = m.Length;
                        index = scantokens[i];
                    }
                }

                if (index >= 0 && len >= 0)
                {
                    tok.EndPos = startpos + len;
                    tok.Text = Input.Substring(tok.StartPos, len);
                    tok.Type = index;
                }
                else if (tok.StartPos < tok.EndPos - 1)
                {
                    tok.Text = Input.Substring(tok.StartPos, 1);
                }

                // Update the line and column count for error reporting.
                tok.File = currentFile;
                tok.Line = currentline;
                if (tok.StartPos < Input.Length)
                    tok.Column = tok.StartPos - Input.LastIndexOf('\n', tok.StartPos);

                if (SkipList.Contains(tok.Type))
                {
                    startpos = tok.EndPos;
                    endpos = tok.EndPos;
                    currentline = tok.Line + (tok.Text.Length - tok.Text.Replace("\n", "").Length);
                    currentFile = tok.File;
                    Skipped.Add(tok);
                }
                else
                {
                    // only assign to non-skipped tokens
                    tok.Skipped = Skipped; // assign prior skips to this token
                    Skipped = new List<Token>(); //reset skips
                }

                // Check to see if the parsed token wants to
                // alter the file and line number.
                if (tok.Type == FileAndLine)
                {
                    var match = Patterns[tok.Type].Match(tok.Text);
                    var fileMatch = match.Groups["File"];
                    if (fileMatch.Success)
                        currentFile = fileMatch.Value;
                    var lineMatch = match.Groups["Line"];
                    if (lineMatch.Success)
                        currentline = int.Parse(lineMatch.Value);
                }
            }
            while (SkipList.Contains(tok.Type));

            LookAheadToken = tok;
            return tok;
        }
Beispiel #11
0
 public void Init(string input, string fileName = "")
 {
     this.Input = input;
     StartPos = 0;
     EndPos = 0;
     CurrentFile = fileName;
     CurrentLine = 1;
     CurrentColumn = 1;
     CurrentPosition = 0;
     LookAheadToken = null;
 }
Beispiel #12
0
        public Scanner()
        {
            Regex regex;
            Patterns = new Dictionary<TokenType, Regex>();
            Tokens = new List<TokenType>();
            LookAheadToken = null;
            Skipped = new List<Token>();

            SkipList = new List<TokenType>();
            SkipList.Add(TokenType.WHITESPACE);

            regex = new Regex(@"\(", RegexOptions.Compiled);
            Patterns.Add(TokenType.BROPEN, regex);
            Tokens.Add(TokenType.BROPEN);

            regex = new Regex(@"\)", RegexOptions.Compiled);
            Patterns.Add(TokenType.BRCLOSE, regex);
            Tokens.Add(TokenType.BRCLOSE);

            regex = new Regex(@"^$", RegexOptions.Compiled);
            Patterns.Add(TokenType.EOF, regex);
            Tokens.Add(TokenType.EOF);

            regex = new Regex("==", RegexOptions.Compiled);
            Patterns.Add(TokenType.EQ, regex);
            Tokens.Add(TokenType.EQ);

            regex = new Regex("!=", RegexOptions.Compiled);
            Patterns.Add(TokenType.NEQ, regex);
            Tokens.Add(TokenType.NEQ);

            regex = new Regex("contains", RegexOptions.Compiled);
            Patterns.Add(TokenType.CONTAINS, regex);
            Tokens.Add(TokenType.CONTAINS);

            regex = new Regex("&&", RegexOptions.Compiled);
            Patterns.Add(TokenType.AND, regex);
            Tokens.Add(TokenType.AND);

            regex = new Regex("\\|\\|", RegexOptions.Compiled);
            Patterns.Add(TokenType.OR, regex);
            Tokens.Add(TokenType.OR);

            regex = new Regex(@"\w+", RegexOptions.Compiled);
            Patterns.Add(TokenType.NAME, regex);
            Tokens.Add(TokenType.NAME);

            regex = new Regex(@"""[^""]*""", RegexOptions.Compiled);
            Patterns.Add(TokenType.VALUE, regex);
            Tokens.Add(TokenType.VALUE);

            regex = new Regex(";", RegexOptions.Compiled);
            Patterns.Add(TokenType.SEP, regex);
            Tokens.Add(TokenType.SEP);

            regex = new Regex("Was", RegexOptions.Compiled);
            Patterns.Add(TokenType.WAS, regex);
            Tokens.Add(TokenType.WAS);

            regex = new Regex("Obsolete", RegexOptions.Compiled);
            Patterns.Add(TokenType.OBSOLETE, regex);
            Tokens.Add(TokenType.OBSOLETE);

            regex = new Regex("Delete", RegexOptions.Compiled);
            Patterns.Add(TokenType.DELETE, regex);
            Tokens.Add(TokenType.DELETE);

            regex = new Regex(@"\s+", RegexOptions.Compiled);
            Patterns.Add(TokenType.WHITESPACE, regex);
            Tokens.Add(TokenType.WHITESPACE);
        }
Beispiel #13
0
        /// <summary>
        /// returns token with longest best match
        /// </summary>
        /// <returns></returns>
        public Token LookAhead(params TokenType[] scantokens)
        {
            int len;
            int index = -1;
            int i;
            int startpos = StartPos;
            Token tok = null;

            // this prevents double scanning and matching
            // increased performance
            if (LookAheadToken != null
                && LookAheadToken.Type != TokenType._UNDETERMINED_
                && LookAheadToken.Type != TokenType._NONE_) return LookAheadToken;

            // if no scantokens specified, then scan for all of them (= backward compatible)
            if (scantokens.Length == 0)
                scantokens = Tokens.ToArray();

            do
            {

                len = -1;
                string input = Input.Substring(startpos);

                tok = new Token(startpos, EndPos);

                for (i = 0; i < scantokens.Length; i++)
                {
                    Regex r = Patterns[scantokens[i]];
                    Match m = r.Match(input);
                    if (m.Success && m.Index == 0 && m.Length > len)
                    {
                        len = m.Length;
                        index = i;
                    }
                }

                if (index >= 0 && len >= 0)
                {
                    tok.EndPos = startpos + len;
                    tok.Text = Input.Substring(tok.StartPos, len);
                    tok.Type = scantokens[index];
                }
                else
                {
                    if (tok.EndPos < Input.Length)
                        tok.Text = Input.Substring(tok.StartPos, 1);
                }

                if (SkipList.Contains(tok.Type))
                {
                    startpos = tok.EndPos;
                    Skipped.Add(tok);
                }
            }
            while (SkipList.Contains(tok.Type));

            LookAheadToken = tok;
            return tok;
        }