public static string Interpret(string file)
        {
            var tokenTable = new TokenTable();

            var lexFileMode = LexFileMode.Normal;

            var llText = File.ReadAllText(file);

            var modeRegex = new Regex(@"^[a-zA-Z_][0-9a-zA-Z_-]*$");
            var tokenRegex = new Regex(@"^([^\s]+)\s+(([a-zA-Z_][0-9a-zA-Z_-]*)|(%%))(\s+([a-zA-Z_][0-9a-zA-Z_-]*))?$");

            var lines = llText
                .Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries)
                .Select(x => x.Trim());

            var lexerMode = 0;
            var lexerModes = new Dictionary<string, int>();
            var tokenTypes = new List<string>();
            var keywords = new List<string>();

            string llName = lines.First();
            lines = lines.Skip(1);

            var lastDot = llName.LastIndexOf('.');

            string ns = llName.Remove(lastDot);
            string name = llName.Substring(lastDot + 1);
            string lexerName = name + "Lexer";
            string tokenName = name + "Token";
            string tokenTypeName = name + "TokenType";

            string codeLexeme = null;
            string code = null;
            string buffer = null;
            string keywordDefault = "";
            string keywordTail = "";

            Action<string> registerMode = x =>
            {
                if (!lexerModes.ContainsKey(x))
                    lexerModes.Add(x, lexerMode++);
            };

            foreach (var line in lines)
            {
                if (lexFileMode == LexFileMode.Normal)
                {
                    Match m = null;
                    if (modeRegex.IsMatch(line))
                    {
                        registerMode(line);

                        tokenTable.SetMode(lexerModes[line]);
                    }
                    else if (line == KeywordDelimiter)
                    {
                        lexFileMode = LexFileMode.Keyword;
                        continue;
                    }
                    else if (line == CodeDelimiter)
                    {
                        lexFileMode = LexFileMode.Code;
                        codeLexeme = null;
                        code = "";
                        continue;
                    }
                    else if ((m = tokenRegex.Match(line)) != null)
                    {
                        var regex = m.Groups[1].Value;

                        var regexLexer = new RegexLexer(regex);
                        var tokens = regexLexer.GetTokens();
                        var parser = new RegexParser(tokens.ToArray());
                        var ast = parser.Parse();
                        var compiler = new RegexCompiler(ast);
                        var strings = compiler.ExpandRegex();

                        foreach (var lexeme in strings)
                        {
                            var tokenType = m.Groups[2].Value;

                            if (tokenType == CodeDelimiter)
                            {
                                codeLexeme = lexeme;
                                code = "";
                                lexFileMode = LexFileMode.Code;
                                continue;
                            }
                            else if (!tokenTypes.Contains(tokenType))
                                tokenTypes.Add(tokenType);

                            var newMode = m.Groups[6].Value;

                            if (!string.IsNullOrEmpty(newMode))
                            {
                                registerMode(newMode);

                                tokenTable.Add(lexeme, tokenType, lexerModes[newMode]);
                            }
                            else
                                tokenTable.Add(lexeme, tokenType);
                        }
                    }
                }
                else if (lexFileMode == LexFileMode.Code)
                {
                    if (line == CodeDelimiter)
                    {
                        if (codeLexeme != null)
                            tokenTable.AddLexemeCode(codeLexeme, code);
                        else
                            tokenTable.AddCode(code);

                        lexFileMode = LexFileMode.Normal;
                        continue;
                    }
                    else
                        code += line + "\r\n";
                }
                else if (lexFileMode == LexFileMode.Keyword)
                {
                    if (line == KeywordDelimiter)
                    {
                        lexFileMode = LexFileMode.Normal;
                        continue;
                    }
                    else if (line == CodeDelimiter)
                    {
                        lexFileMode = LexFileMode.KeywordDefault;
                        continue;
                    }
                    else if (line != "")
                    {
                        keywords.Add(line);
                        tokenTable.AddKeyword(line);
                    }
                }
                else if (lexFileMode == LexFileMode.KeywordDefault)
                {
                    if (line == CodeDelimiter)
                    {
                        if (string.IsNullOrEmpty(keywordDefault))
                        {
                            keywordDefault = buffer;
                        }
                        else
                        {
                            keywordTail = buffer;
                        }

                        buffer = "";

                        lexFileMode = LexFileMode.Keyword;
                        continue;
                    }
                    else
                    {
                        buffer += line + "\r\n";
                    }
                }
            }

            foreach (var keyword in keywords)
            {
                var t = keyword + "Keyword";
                if (keywordTail != null)
                {
                    tokenTable.AddLexemeCode(keyword, keywordTail.Replace("{Keyword}", t));
                }
                else
                {
                    tokenTable.Add(keyword, t);
                }
            }

            if (!string.IsNullOrEmpty(keywordDefault))
            {
                var k = keywords
                    .SelectMany(x => Enumerable
                        .Range(1, x.Length - 1)
                        .Select(y => x.Remove(y))
                        .ToArray())
                    .Distinct()
                    .ToArray();

                foreach (var i in k)
                {
                    if (tokenTable.Lists.Any(x => x.Value.Any(y => y.Lexeme == i)))
                    {
                        continue;
                    }
                    tokenTable.AddLexemeCode(i, keywordDefault);
                }
            }

            //var tuples = tokenTable.Lists[1]
            //    .Where(x => x.TokenType != "None" && x.NewMode == null)
            //    .Concat(tokenTable.Keywords
            //        .Select(y => new TokenEntry(y, y + "Keyword")))
            //    .Select(x => string.Format(
            //        "Tuple.Create(TokenType.{0},\"{1}\"),",
            //        x.TokenType,
            //        Char.IsWhiteSpace(x.Lexeme[0]) ? string.Format("\\x{0:X2}", (int)x.Lexeme[0]) :
            //        x.Lexeme == "\\" ? "\\\\" :
            //        x.Lexeme))
            //    .Aggregate((x, y) => x + "\r\n" + y);

            var generator = new LexerGenerator(tokenTable);
            var lexer = generator.Generate();

            return lexer
                .Replace("{Lexer}", lexerName)
                .Replace("{Token}", tokenName)
                .Replace("{TokenType}", tokenTypeName)
                .Replace("{LexerNamespace}", ns);
        }
Example #2
0
        public static string Interpret(string file)
        {
            var interpreter = new AphidInterpreter();
            interpreter.InterpretFile(file);
            var retVal = interpreter.GetReturnValue();
            var llexFile = new LLexFile();
            retVal.Bind(llexFile);
            var tokenTable = new TokenTable();
            tokenTable.Ignore = llexFile.Ignore;
            var nameInfo = LLexNameInfo.Parse(llexFile.Name);

            int z = 0;
            var modeTable = llexFile.Modes.ToDictionary(x => x.Mode, x => z++);
            var tokenTypes = new List<string>();

            foreach (var mode in llexFile.Modes)
            {
                tokenTable.SetMode(modeTable[mode.Mode]);
                foreach (var token in mode.Tokens)
                {
                    if (token.Regex != null)
                    {
                        var regexLexer = new RegexLexer(token.Regex);
                        var tokens = regexLexer.GetTokens();
                        var parser = new RegexParser(tokens.ToArray());
                        var ast = parser.Parse();
                        var compiler = new RegexCompiler(ast);
                        var strings = compiler.ExpandRegex();

                        foreach (var l in strings)
                        {
                            if (token.Code != null)
                            {
                                tokenTable.AddLexemeCode(l, token.Code);
                                continue;
                            }

                            if (!tokenTypes.Contains(token.TokenType))
                                tokenTypes.Add(token.TokenType);

                            if (!string.IsNullOrEmpty(token.NewMode))
                            {
                                tokenTable.Add(l, token.TokenType, modeTable[token.NewMode]);
                            }
                            else
                            {
                                tokenTable.Add(l, token.TokenType);
                            }
                        }
                    }
                    else if (token.Code != null)
                    {
                        tokenTable.AddCode(token.Code);
                    }
                }

                foreach (var keyword in mode.Keywords)
                {
                    tokenTable.AddKeyword(keyword);
                    var t = keyword + "Keyword";
                    if (mode.KeywordTail != null)
                    {
                        tokenTable.AddLexemeCode(keyword, mode.KeywordTail.Replace("{Keyword}", t));
                    }
                    else
                    {
                        tokenTable.Add(keyword, t);
                    }
                }

                if (!string.IsNullOrEmpty(mode.KeywordDefault))
                {
                    var k = mode.Keywords
                        .SelectMany(x => Enumerable
                            .Range(1, x.Length - 1)
                            .Select(y => x.Remove(y))
                            .ToArray())
                        .Distinct()
                        .ToArray();

                    foreach (var i in k)
                    {
                        if (tokenTable.Lists.Any(x => x.Value.Any(y => y.Lexeme == i)))
                        {
                            continue;
                        }
                        tokenTable.AddLexemeCode(i, mode.KeywordDefault);
                    }
                }
            }

            var generator = new LexerGenerator(tokenTable) { IgnoreCase = llexFile.IgnoreCase };
            var lexer = generator.Generate();

            return lexer
                .Replace("{Lexer}", nameInfo.LexerName)
                .Replace("{Token}", nameInfo.TokenName)
                .Replace("{TokenType}", nameInfo.TokenTypeName)
                .Replace("{LexerNamespace}", nameInfo.Namespace);
        }
Example #3
0
        public static string Interpret(string file)
        {
            var tokenTable = new TokenTable();

            var lexFileMode = LexFileMode.Normal;

            var llText = File.ReadAllText(file);

            var modeRegex  = new Regex(@"^[a-zA-Z_][0-9a-zA-Z_-]*$");
            var tokenRegex = new Regex(@"^([^\s]+)\s+(([a-zA-Z_][0-9a-zA-Z_-]*)|(%%))(\s+([a-zA-Z_][0-9a-zA-Z_-]*))?$");

            var lines = llText
                        .Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries)
                        .Select(x => x.Trim());

            var lexerMode  = 0;
            var lexerModes = new Dictionary <string, int>();
            var tokenTypes = new List <string>();
            var keywords   = new List <string>();

            string llName = lines.First();

            lines = lines.Skip(1);

            var lastDot = llName.LastIndexOf('.');

            string ns            = llName.Remove(lastDot);
            string name          = llName.Substring(lastDot + 1);
            string lexerName     = name + "Lexer";
            string tokenName     = name + "Token";
            string tokenTypeName = name + "TokenType";

            string codeLexeme     = null;
            string code           = null;
            string buffer         = null;
            string keywordDefault = "";
            string keywordTail    = "";

            Action <string> registerMode = x =>
            {
                if (!lexerModes.ContainsKey(x))
                {
                    lexerModes.Add(x, lexerMode++);
                }
            };

            foreach (var line in lines)
            {
                if (lexFileMode == LexFileMode.Normal)
                {
                    Match m = null;
                    if (modeRegex.IsMatch(line))
                    {
                        registerMode(line);

                        tokenTable.SetMode(lexerModes[line]);
                    }
                    else if (line == KeywordDelimiter)
                    {
                        lexFileMode = LexFileMode.Keyword;
                        continue;
                    }
                    else if (line == CodeDelimiter)
                    {
                        lexFileMode = LexFileMode.Code;
                        codeLexeme  = null;
                        code        = "";
                        continue;
                    }
                    else if ((m = tokenRegex.Match(line)) != null)
                    {
                        var regex = m.Groups[1].Value;

                        var regexLexer = new RegexLexer(regex);
                        var tokens     = regexLexer.GetTokens();
                        var parser     = new RegexParser(tokens.ToArray());
                        var ast        = parser.Parse();
                        var compiler   = new RegexCompiler(ast);
                        var strings    = compiler.ExpandRegex();

                        foreach (var lexeme in strings)
                        {
                            var tokenType = m.Groups[2].Value;

                            if (tokenType == CodeDelimiter)
                            {
                                codeLexeme  = lexeme;
                                code        = "";
                                lexFileMode = LexFileMode.Code;
                                continue;
                            }
                            else if (!tokenTypes.Contains(tokenType))
                            {
                                tokenTypes.Add(tokenType);
                            }

                            var newMode = m.Groups[6].Value;

                            if (!string.IsNullOrEmpty(newMode))
                            {
                                registerMode(newMode);

                                tokenTable.Add(lexeme, tokenType, lexerModes[newMode]);
                            }
                            else
                            {
                                tokenTable.Add(lexeme, tokenType);
                            }
                        }
                    }
                }
                else if (lexFileMode == LexFileMode.Code)
                {
                    if (line == CodeDelimiter)
                    {
                        if (codeLexeme != null)
                        {
                            tokenTable.AddLexemeCode(codeLexeme, code);
                        }
                        else
                        {
                            tokenTable.AddCode(code);
                        }

                        lexFileMode = LexFileMode.Normal;
                        continue;
                    }
                    else
                    {
                        code += line + "\r\n";
                    }
                }
                else if (lexFileMode == LexFileMode.Keyword)
                {
                    if (line == KeywordDelimiter)
                    {
                        lexFileMode = LexFileMode.Normal;
                        continue;
                    }
                    else if (line == CodeDelimiter)
                    {
                        lexFileMode = LexFileMode.KeywordDefault;
                        continue;
                    }
                    else if (line != "")
                    {
                        keywords.Add(line);
                        tokenTable.AddKeyword(line);
                    }
                }
                else if (lexFileMode == LexFileMode.KeywordDefault)
                {
                    if (line == CodeDelimiter)
                    {
                        if (string.IsNullOrEmpty(keywordDefault))
                        {
                            keywordDefault = buffer;
                        }
                        else
                        {
                            keywordTail = buffer;
                        }

                        buffer = "";

                        lexFileMode = LexFileMode.Keyword;
                        continue;
                    }
                    else
                    {
                        buffer += line + "\r\n";
                    }
                }
            }

            foreach (var keyword in keywords)
            {
                var t = keyword + "Keyword";
                if (keywordTail != null)
                {
                    tokenTable.AddLexemeCode(keyword, keywordTail.Replace("{Keyword}", t));
                }
                else
                {
                    tokenTable.Add(keyword, t);
                }
            }

            if (!string.IsNullOrEmpty(keywordDefault))
            {
                var k = keywords
                        .SelectMany(x => Enumerable
                                    .Range(1, x.Length - 1)
                                    .Select(y => x.Remove(y))
                                    .ToArray())
                        .Distinct()
                        .ToArray();

                foreach (var i in k)
                {
                    if (tokenTable.Lists.Any(x => x.Value.Any(y => y.Lexeme == i)))
                    {
                        continue;
                    }
                    tokenTable.AddLexemeCode(i, keywordDefault);
                }
            }

            //var tuples = tokenTable.Lists[1]
            //    .Where(x => x.TokenType != "None" && x.NewMode == null)
            //    .Concat(tokenTable.Keywords
            //        .Select(y => new TokenEntry(y, y + "Keyword")))
            //    .Select(x => string.Format(
            //        "Tuple.Create(TokenType.{0},\"{1}\"),",
            //        x.TokenType,
            //        Char.IsWhiteSpace(x.Lexeme[0]) ? string.Format("\\x{0:X2}", (int)x.Lexeme[0]) :
            //        x.Lexeme == "\\" ? "\\\\" :
            //        x.Lexeme))
            //    .Aggregate((x, y) => x + "\r\n" + y);

            var generator = new LexerGenerator(tokenTable);
            var lexer     = generator.Generate();

            return(lexer
                   .Replace("{Lexer}", lexerName)
                   .Replace("{Token}", tokenName)
                   .Replace("{TokenType}", tokenTypeName)
                   .Replace("{LexerNamespace}", ns));
        }
Example #4
0
        public static string Interpret(string file)
        {
            var interpreter = new AphidInterpreter();

            interpreter.InterpretFile(file);
            var retVal   = interpreter.GetReturnValue();
            var llexFile = new LLexFile();

            retVal.Bind(llexFile);
            var tokenTable = new TokenTable();

            tokenTable.Ignore = llexFile.Ignore;
            var nameInfo = LLexNameInfo.Parse(llexFile.Name);

            int z          = 0;
            var modeTable  = llexFile.Modes.ToDictionary(x => x.Mode, x => z++);
            var tokenTypes = new List <string>();

            foreach (var mode in llexFile.Modes)
            {
                tokenTable.SetMode(modeTable[mode.Mode]);
                foreach (var token in mode.Tokens)
                {
                    if (token.Regex != null)
                    {
                        var regexLexer = new RegexLexer(token.Regex);
                        var tokens     = regexLexer.GetTokens();
                        var parser     = new RegexParser(tokens.ToArray());
                        var ast        = parser.Parse();
                        var compiler   = new RegexCompiler(ast);
                        var strings    = compiler.ExpandRegex();

                        foreach (var l in strings)
                        {
                            if (token.Code != null)
                            {
                                tokenTable.AddLexemeCode(l, token.Code);
                                continue;
                            }

                            if (!tokenTypes.Contains(token.TokenType))
                            {
                                tokenTypes.Add(token.TokenType);
                            }

                            if (!string.IsNullOrEmpty(token.NewMode))
                            {
                                tokenTable.Add(l, token.TokenType, modeTable[token.NewMode]);
                            }
                            else
                            {
                                tokenTable.Add(l, token.TokenType);
                            }
                        }
                    }
                    else if (token.Code != null)
                    {
                        tokenTable.AddCode(token.Code);
                    }
                }

                foreach (var keyword in mode.Keywords)
                {
                    tokenTable.AddKeyword(keyword);
                    var t = keyword + "Keyword";
                    if (mode.KeywordTail != null)
                    {
                        tokenTable.AddLexemeCode(keyword, mode.KeywordTail.Replace("{Keyword}", t));
                    }
                    else
                    {
                        tokenTable.Add(keyword, t);
                    }
                }

                if (!string.IsNullOrEmpty(mode.KeywordDefault))
                {
                    var k = mode.Keywords
                            .SelectMany(x => Enumerable
                                        .Range(1, x.Length - 1)
                                        .Select(y => x.Remove(y))
                                        .ToArray())
                            .Distinct()
                            .ToArray();

                    foreach (var i in k)
                    {
                        if (tokenTable.Lists.Any(x => x.Value.Any(y => y.Lexeme == i)))
                        {
                            continue;
                        }
                        tokenTable.AddLexemeCode(i, mode.KeywordDefault);
                    }
                }
            }

            var generator = new LexerGenerator(tokenTable);
            var lexer     = generator.Generate();


            return(lexer
                   .Replace("{Lexer}", nameInfo.LexerName)
                   .Replace("{Token}", nameInfo.TokenName)
                   .Replace("{TokenType}", nameInfo.TokenTypeName)
                   .Replace("{LexerNamespace}", nameInfo.Namespace));
        }
Example #5
0
        public static string From(AphidObject retVal)
        {
            var llexFile = new LLexFile();

            retVal.Bind(llexFile);
            var tokenTable = new TokenTable {
                Ignore = llexFile.Ignore
            };
            var nameInfo = LLexNameInfo.Parse(llexFile.Name);

            var z          = 0;
            var modeTable  = llexFile.Modes.ToDictionary(x => x.Mode, x => z++);
            var tokenTypes = new List <string>();

            foreach (var mode in llexFile.Modes)
            {
                tokenTable.SetMode(modeTable[mode.Mode]);
                foreach (var token in mode.Tokens)
                {
                    if (token.Regex != null)
                    {
                        var regexLexer = new RegexLexer(token.Regex);
                        var tokens     = regexLexer.GetTokens();
                        var parser     = new RegexParser(tokens.ToArray());
                        var ast        = parser.Parse();
                        var compiler   = new RegexCompiler(ast);
                        var strings    = compiler.ExpandRegex();

                        foreach (var l in strings)
                        {
                            if (token.Code != null)
                            {
                                tokenTable.AddLexemeCode(l, token.Code);
                                continue;
                            }

                            if (!tokenTypes.Contains(token.TokenType))
                            {
                                tokenTypes.Add(token.TokenType);
                            }

                            if (!string.IsNullOrEmpty(token.NewMode))
                            {
                                tokenTable.Add(l, token.TokenType, modeTable[token.NewMode]);
                            }
                            else
                            {
                                tokenTable.Add(l, token.TokenType);
                            }
                        }
                    }
                    else if (token.Code != null)
                    {
                        tokenTable.AddCode(token.Code);
                    }
                    else if (token.TokenType != null)
                    {
                        tokenTable.AddCode("return {TokenType}." + token.TokenType + ";\r\n");
                    }
                    else
                    {
                        throw new NotImplementedException(
                                  "Token with no regex, code, or type not supported.");
                    }
                }

                foreach (var keyword in mode.Keywords ?? Array.Empty <string>())
                {
                    tokenTable.AddKeyword(keyword);
                    var t = keyword + "Keyword";
                    if (mode.KeywordTail != null)
                    {
                        tokenTable.AddLexemeCode(keyword, mode.KeywordTail.Replace("{Keyword}", t));
                    }
                    else
                    {
                        tokenTable.Add(keyword, t);
                    }
                }

                if (!string.IsNullOrEmpty(mode.KeywordDefault))
                {
                    var k = mode.Keywords
                            .SelectMany(x => Enumerable
                                        .Range(1, x.Length - 1)
                                        .Select(y => x.Remove(y))
                                        .ToArray())
                            .Distinct()
                            .ToArray();

                    foreach (var i in k)
                    {
                        if (tokenTable.Lists.Any(x => x.Value.Any(y => y.Lexeme == i)))
                        {
                            continue;
                        }

                        tokenTable.AddLexemeCode(i, mode.KeywordDefault);
                    }
                }
            }

            var generator = new LexerGenerator(tokenTable)
            {
                IgnoreCase = llexFile.IgnoreCase
            };
            var lexer = generator.Generate();

            return(lexer
                   .Replace("{Lexer}", nameInfo.LexerName)
                   .Replace("{Token}", nameInfo.TokenName)
                   .Replace("{TokenType}", nameInfo.TokenTypeName)
                   .Replace("{LexerNamespace}", nameInfo.Namespace));
        }