private static void TestCompiler() { foreach (var regex in new[] { @"<(\?(php)?)|%", @"<(\?|%)=", @"(<>)|(!=)", @"\?|:", //@"\r|\n|\t|\v|\s", @"(\?|%)>", }) { var regexLexer = new RegexLexer(regex); var tokens = regexLexer.GetTokens(); var parser = new RegexParser(tokens.ToArray()); var ast = parser.Parse(); var compiler = new RegexCompiler(ast); var strings = compiler.ExpandRegex(); Console.WriteLine("Regex:\r\n {0}\r\nExpanded:", regex); foreach (var s in strings) { Console.WriteLine(" {0}", s); } Console.WriteLine(); } }
static void TestCompiler() { foreach (var regex in new[] { @"<(\?(php)?)|%", @"<(\?|%)=", @"(<>)|(!=)", @"\?|:", //@"\r|\n|\t|\v|\s", @"(\?|%)>", }) { var regexLexer = new RegexLexer(regex); var tokens = regexLexer.GetTokens(); var parser = new RegexParser(tokens.ToArray()); var ast = parser.Parse(); var compiler = new RegexCompiler(ast); var strings = compiler.ExpandRegex(); Console.WriteLine("Regex:\r\n {0}\r\nExpanded:", regex); foreach (var s in strings) { Console.WriteLine(" {0}", s); } Console.WriteLine(); } }
public static string Interpret(string file) { var tokenTable = new TokenTable(); var lexFileMode = LexFileMode.Normal; var llText = File.ReadAllText(file); var modeRegex = new Regex(@"^[a-zA-Z_][0-9a-zA-Z_-]*$"); var tokenRegex = new Regex(@"^([^\s]+)\s+(([a-zA-Z_][0-9a-zA-Z_-]*)|(%%))(\s+([a-zA-Z_][0-9a-zA-Z_-]*))?$"); var lines = llText .Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries) .Select(x => x.Trim()); var lexerMode = 0; var lexerModes = new Dictionary<string, int>(); var tokenTypes = new List<string>(); var keywords = new List<string>(); string llName = lines.First(); lines = lines.Skip(1); var lastDot = llName.LastIndexOf('.'); string ns = llName.Remove(lastDot); string name = llName.Substring(lastDot + 1); string lexerName = name + "Lexer"; string tokenName = name + "Token"; string tokenTypeName = name + "TokenType"; string codeLexeme = null; string code = null; string buffer = null; string keywordDefault = ""; string keywordTail = ""; Action<string> registerMode = x => { if (!lexerModes.ContainsKey(x)) lexerModes.Add(x, lexerMode++); }; foreach (var line in lines) { if (lexFileMode == LexFileMode.Normal) { Match m = null; if (modeRegex.IsMatch(line)) { registerMode(line); tokenTable.SetMode(lexerModes[line]); } else if (line == KeywordDelimiter) { lexFileMode = LexFileMode.Keyword; continue; } else if (line == CodeDelimiter) { lexFileMode = LexFileMode.Code; codeLexeme = null; code = ""; continue; } else if ((m = tokenRegex.Match(line)) != null) { var regex = m.Groups[1].Value; var regexLexer = new RegexLexer(regex); var tokens = regexLexer.GetTokens(); var parser = new RegexParser(tokens.ToArray()); var ast = parser.Parse(); var compiler = new RegexCompiler(ast); var strings = compiler.ExpandRegex(); foreach (var lexeme in strings) { var tokenType = m.Groups[2].Value; if (tokenType == CodeDelimiter) { codeLexeme = lexeme; code = ""; lexFileMode = LexFileMode.Code; continue; } else if (!tokenTypes.Contains(tokenType)) tokenTypes.Add(tokenType); var newMode = m.Groups[6].Value; if (!string.IsNullOrEmpty(newMode)) { registerMode(newMode); tokenTable.Add(lexeme, tokenType, lexerModes[newMode]); } else tokenTable.Add(lexeme, tokenType); } } } else if (lexFileMode == LexFileMode.Code) { if (line == CodeDelimiter) { if (codeLexeme != null) tokenTable.AddLexemeCode(codeLexeme, code); else tokenTable.AddCode(code); lexFileMode = LexFileMode.Normal; continue; } else code += line + "\r\n"; } else if (lexFileMode == LexFileMode.Keyword) { if (line == KeywordDelimiter) { lexFileMode = LexFileMode.Normal; continue; } else if (line == CodeDelimiter) { lexFileMode = LexFileMode.KeywordDefault; continue; } else if (line != "") { keywords.Add(line); tokenTable.AddKeyword(line); } } else if (lexFileMode == LexFileMode.KeywordDefault) { if (line == CodeDelimiter) { if (string.IsNullOrEmpty(keywordDefault)) { keywordDefault = buffer; } else { keywordTail = buffer; } buffer = ""; lexFileMode = LexFileMode.Keyword; continue; } else { buffer += line + "\r\n"; } } } foreach (var keyword in keywords) { var t = keyword + "Keyword"; if (keywordTail != null) { tokenTable.AddLexemeCode(keyword, keywordTail.Replace("{Keyword}", t)); } else { tokenTable.Add(keyword, t); } } if (!string.IsNullOrEmpty(keywordDefault)) { var k = keywords .SelectMany(x => Enumerable .Range(1, x.Length - 1) .Select(y => x.Remove(y)) .ToArray()) .Distinct() .ToArray(); foreach (var i in k) { if (tokenTable.Lists.Any(x => x.Value.Any(y => y.Lexeme == i))) { continue; } tokenTable.AddLexemeCode(i, keywordDefault); } } //var tuples = tokenTable.Lists[1] // .Where(x => x.TokenType != "None" && x.NewMode == null) // .Concat(tokenTable.Keywords // .Select(y => new TokenEntry(y, y + "Keyword"))) // .Select(x => string.Format( // "Tuple.Create(TokenType.{0},\"{1}\"),", // x.TokenType, // Char.IsWhiteSpace(x.Lexeme[0]) ? string.Format("\\x{0:X2}", (int)x.Lexeme[0]) : // x.Lexeme == "\\" ? "\\\\" : // x.Lexeme)) // .Aggregate((x, y) => x + "\r\n" + y); var generator = new LexerGenerator(tokenTable); var lexer = generator.Generate(); return lexer .Replace("{Lexer}", lexerName) .Replace("{Token}", tokenName) .Replace("{TokenType}", tokenTypeName) .Replace("{LexerNamespace}", ns); }
public static string Interpret(string file) { var interpreter = new AphidInterpreter(); interpreter.InterpretFile(file); var retVal = interpreter.GetReturnValue(); var llexFile = new LLexFile(); retVal.Bind(llexFile); var tokenTable = new TokenTable(); tokenTable.Ignore = llexFile.Ignore; var nameInfo = LLexNameInfo.Parse(llexFile.Name); int z = 0; var modeTable = llexFile.Modes.ToDictionary(x => x.Mode, x => z++); var tokenTypes = new List<string>(); foreach (var mode in llexFile.Modes) { tokenTable.SetMode(modeTable[mode.Mode]); foreach (var token in mode.Tokens) { if (token.Regex != null) { var regexLexer = new RegexLexer(token.Regex); var tokens = regexLexer.GetTokens(); var parser = new RegexParser(tokens.ToArray()); var ast = parser.Parse(); var compiler = new RegexCompiler(ast); var strings = compiler.ExpandRegex(); foreach (var l in strings) { if (token.Code != null) { tokenTable.AddLexemeCode(l, token.Code); continue; } if (!tokenTypes.Contains(token.TokenType)) tokenTypes.Add(token.TokenType); if (!string.IsNullOrEmpty(token.NewMode)) { tokenTable.Add(l, token.TokenType, modeTable[token.NewMode]); } else { tokenTable.Add(l, token.TokenType); } } } else if (token.Code != null) { tokenTable.AddCode(token.Code); } } foreach (var keyword in mode.Keywords) { tokenTable.AddKeyword(keyword); var t = keyword + "Keyword"; if (mode.KeywordTail != null) { tokenTable.AddLexemeCode(keyword, mode.KeywordTail.Replace("{Keyword}", t)); } else { tokenTable.Add(keyword, t); } } if (!string.IsNullOrEmpty(mode.KeywordDefault)) { var k = mode.Keywords .SelectMany(x => Enumerable .Range(1, x.Length - 1) .Select(y => x.Remove(y)) .ToArray()) .Distinct() .ToArray(); foreach (var i in k) { if (tokenTable.Lists.Any(x => x.Value.Any(y => y.Lexeme == i))) { continue; } tokenTable.AddLexemeCode(i, mode.KeywordDefault); } } } var generator = new LexerGenerator(tokenTable) { IgnoreCase = llexFile.IgnoreCase }; var lexer = generator.Generate(); return lexer .Replace("{Lexer}", nameInfo.LexerName) .Replace("{Token}", nameInfo.TokenName) .Replace("{TokenType}", nameInfo.TokenTypeName) .Replace("{LexerNamespace}", nameInfo.Namespace); }
public static string Interpret(string file) { var interpreter = new AphidInterpreter(); interpreter.InterpretFile(file); var retVal = interpreter.GetReturnValue(); var llexFile = new LLexFile(); retVal.Bind(llexFile); var tokenTable = new TokenTable(); tokenTable.Ignore = llexFile.Ignore; var nameInfo = LLexNameInfo.Parse(llexFile.Name); int z = 0; var modeTable = llexFile.Modes.ToDictionary(x => x.Mode, x => z++); var tokenTypes = new List <string>(); foreach (var mode in llexFile.Modes) { tokenTable.SetMode(modeTable[mode.Mode]); foreach (var token in mode.Tokens) { if (token.Regex != null) { var regexLexer = new RegexLexer(token.Regex); var tokens = regexLexer.GetTokens(); var parser = new RegexParser(tokens.ToArray()); var ast = parser.Parse(); var compiler = new RegexCompiler(ast); var strings = compiler.ExpandRegex(); foreach (var l in strings) { if (token.Code != null) { tokenTable.AddLexemeCode(l, token.Code); continue; } if (!tokenTypes.Contains(token.TokenType)) { tokenTypes.Add(token.TokenType); } if (!string.IsNullOrEmpty(token.NewMode)) { tokenTable.Add(l, token.TokenType, modeTable[token.NewMode]); } else { tokenTable.Add(l, token.TokenType); } } } else if (token.Code != null) { tokenTable.AddCode(token.Code); } } foreach (var keyword in mode.Keywords) { tokenTable.AddKeyword(keyword); var t = keyword + "Keyword"; if (mode.KeywordTail != null) { tokenTable.AddLexemeCode(keyword, mode.KeywordTail.Replace("{Keyword}", t)); } else { tokenTable.Add(keyword, t); } } if (!string.IsNullOrEmpty(mode.KeywordDefault)) { var k = mode.Keywords .SelectMany(x => Enumerable .Range(1, x.Length - 1) .Select(y => x.Remove(y)) .ToArray()) .Distinct() .ToArray(); foreach (var i in k) { if (tokenTable.Lists.Any(x => x.Value.Any(y => y.Lexeme == i))) { continue; } tokenTable.AddLexemeCode(i, mode.KeywordDefault); } } } var generator = new LexerGenerator(tokenTable); var lexer = generator.Generate(); return(lexer .Replace("{Lexer}", nameInfo.LexerName) .Replace("{Token}", nameInfo.TokenName) .Replace("{TokenType}", nameInfo.TokenTypeName) .Replace("{LexerNamespace}", nameInfo.Namespace)); }
public static string Interpret(string file) { var tokenTable = new TokenTable(); var lexFileMode = LexFileMode.Normal; var llText = File.ReadAllText(file); var modeRegex = new Regex(@"^[a-zA-Z_][0-9a-zA-Z_-]*$"); var tokenRegex = new Regex(@"^([^\s]+)\s+(([a-zA-Z_][0-9a-zA-Z_-]*)|(%%))(\s+([a-zA-Z_][0-9a-zA-Z_-]*))?$"); var lines = llText .Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries) .Select(x => x.Trim()); var lexerMode = 0; var lexerModes = new Dictionary <string, int>(); var tokenTypes = new List <string>(); var keywords = new List <string>(); string llName = lines.First(); lines = lines.Skip(1); var lastDot = llName.LastIndexOf('.'); string ns = llName.Remove(lastDot); string name = llName.Substring(lastDot + 1); string lexerName = name + "Lexer"; string tokenName = name + "Token"; string tokenTypeName = name + "TokenType"; string codeLexeme = null; string code = null; string buffer = null; string keywordDefault = ""; string keywordTail = ""; Action <string> registerMode = x => { if (!lexerModes.ContainsKey(x)) { lexerModes.Add(x, lexerMode++); } }; foreach (var line in lines) { if (lexFileMode == LexFileMode.Normal) { Match m = null; if (modeRegex.IsMatch(line)) { registerMode(line); tokenTable.SetMode(lexerModes[line]); } else if (line == KeywordDelimiter) { lexFileMode = LexFileMode.Keyword; continue; } else if (line == CodeDelimiter) { lexFileMode = LexFileMode.Code; codeLexeme = null; code = ""; continue; } else if ((m = tokenRegex.Match(line)) != null) { var regex = m.Groups[1].Value; var regexLexer = new RegexLexer(regex); var tokens = regexLexer.GetTokens(); var parser = new RegexParser(tokens.ToArray()); var ast = parser.Parse(); var compiler = new RegexCompiler(ast); var strings = compiler.ExpandRegex(); foreach (var lexeme in strings) { var tokenType = m.Groups[2].Value; if (tokenType == CodeDelimiter) { codeLexeme = lexeme; code = ""; lexFileMode = LexFileMode.Code; continue; } else if (!tokenTypes.Contains(tokenType)) { tokenTypes.Add(tokenType); } var newMode = m.Groups[6].Value; if (!string.IsNullOrEmpty(newMode)) { registerMode(newMode); tokenTable.Add(lexeme, tokenType, lexerModes[newMode]); } else { tokenTable.Add(lexeme, tokenType); } } } } else if (lexFileMode == LexFileMode.Code) { if (line == CodeDelimiter) { if (codeLexeme != null) { tokenTable.AddLexemeCode(codeLexeme, code); } else { tokenTable.AddCode(code); } lexFileMode = LexFileMode.Normal; continue; } else { code += line + "\r\n"; } } else if (lexFileMode == LexFileMode.Keyword) { if (line == KeywordDelimiter) { lexFileMode = LexFileMode.Normal; continue; } else if (line == CodeDelimiter) { lexFileMode = LexFileMode.KeywordDefault; continue; } else if (line != "") { keywords.Add(line); tokenTable.AddKeyword(line); } } else if (lexFileMode == LexFileMode.KeywordDefault) { if (line == CodeDelimiter) { if (string.IsNullOrEmpty(keywordDefault)) { keywordDefault = buffer; } else { keywordTail = buffer; } buffer = ""; lexFileMode = LexFileMode.Keyword; continue; } else { buffer += line + "\r\n"; } } } foreach (var keyword in keywords) { var t = keyword + "Keyword"; if (keywordTail != null) { tokenTable.AddLexemeCode(keyword, keywordTail.Replace("{Keyword}", t)); } else { tokenTable.Add(keyword, t); } } if (!string.IsNullOrEmpty(keywordDefault)) { var k = keywords .SelectMany(x => Enumerable .Range(1, x.Length - 1) .Select(y => x.Remove(y)) .ToArray()) .Distinct() .ToArray(); foreach (var i in k) { if (tokenTable.Lists.Any(x => x.Value.Any(y => y.Lexeme == i))) { continue; } tokenTable.AddLexemeCode(i, keywordDefault); } } //var tuples = tokenTable.Lists[1] // .Where(x => x.TokenType != "None" && x.NewMode == null) // .Concat(tokenTable.Keywords // .Select(y => new TokenEntry(y, y + "Keyword"))) // .Select(x => string.Format( // "Tuple.Create(TokenType.{0},\"{1}\"),", // x.TokenType, // Char.IsWhiteSpace(x.Lexeme[0]) ? string.Format("\\x{0:X2}", (int)x.Lexeme[0]) : // x.Lexeme == "\\" ? "\\\\" : // x.Lexeme)) // .Aggregate((x, y) => x + "\r\n" + y); var generator = new LexerGenerator(tokenTable); var lexer = generator.Generate(); return(lexer .Replace("{Lexer}", lexerName) .Replace("{Token}", tokenName) .Replace("{TokenType}", tokenTypeName) .Replace("{LexerNamespace}", ns)); }
public static string From(AphidObject retVal) { var llexFile = new LLexFile(); retVal.Bind(llexFile); var tokenTable = new TokenTable { Ignore = llexFile.Ignore }; var nameInfo = LLexNameInfo.Parse(llexFile.Name); var z = 0; var modeTable = llexFile.Modes.ToDictionary(x => x.Mode, x => z++); var tokenTypes = new List <string>(); foreach (var mode in llexFile.Modes) { tokenTable.SetMode(modeTable[mode.Mode]); foreach (var token in mode.Tokens) { if (token.Regex != null) { var regexLexer = new RegexLexer(token.Regex); var tokens = regexLexer.GetTokens(); var parser = new RegexParser(tokens.ToArray()); var ast = parser.Parse(); var compiler = new RegexCompiler(ast); var strings = compiler.ExpandRegex(); foreach (var l in strings) { if (token.Code != null) { tokenTable.AddLexemeCode(l, token.Code); continue; } if (!tokenTypes.Contains(token.TokenType)) { tokenTypes.Add(token.TokenType); } if (!string.IsNullOrEmpty(token.NewMode)) { tokenTable.Add(l, token.TokenType, modeTable[token.NewMode]); } else { tokenTable.Add(l, token.TokenType); } } } else if (token.Code != null) { tokenTable.AddCode(token.Code); } else if (token.TokenType != null) { tokenTable.AddCode("return {TokenType}." + token.TokenType + ";\r\n"); } else { throw new NotImplementedException( "Token with no regex, code, or type not supported."); } } foreach (var keyword in mode.Keywords ?? Array.Empty <string>()) { tokenTable.AddKeyword(keyword); var t = keyword + "Keyword"; if (mode.KeywordTail != null) { tokenTable.AddLexemeCode(keyword, mode.KeywordTail.Replace("{Keyword}", t)); } else { tokenTable.Add(keyword, t); } } if (!string.IsNullOrEmpty(mode.KeywordDefault)) { var k = mode.Keywords .SelectMany(x => Enumerable .Range(1, x.Length - 1) .Select(y => x.Remove(y)) .ToArray()) .Distinct() .ToArray(); foreach (var i in k) { if (tokenTable.Lists.Any(x => x.Value.Any(y => y.Lexeme == i))) { continue; } tokenTable.AddLexemeCode(i, mode.KeywordDefault); } } } var generator = new LexerGenerator(tokenTable) { IgnoreCase = llexFile.IgnoreCase }; var lexer = generator.Generate(); return(lexer .Replace("{Lexer}", nameInfo.LexerName) .Replace("{Token}", nameInfo.TokenName) .Replace("{TokenType}", nameInfo.TokenTypeName) .Replace("{LexerNamespace}", nameInfo.Namespace)); }