public Lexer(string text, LexerConfig lexerConfig) { _text = text; _position = 0; _currentChar = _text[0]; _errors = new List <Exception>(); _lexerConfig = lexerConfig; PrepareTokenMappings(); }
public Lexer(string text, List <TokenMapping> mappings) { _text = text; _position = 0; _currentChar = _text[0]; _errors = new List <Exception>(); _lexerConfig = LexerConfig.Default(mappings); PrepareTokenMappings(); }
/// <summary> /// Generates a config from a map /// </summary> /// <param name="data">The map</param> /// <returns>The config</returns> public LexerConfig GenerateConfig(ConfigurationDictionary data) { LexerConfig config = new LexerConfig(); { var stringData = data.Caster<string>(); config.Namespace = stringData["Namespace"]; config.Name = stringData["Name"]; config.Enum = stringData["Enum"]; config.Method = stringData["Method"]; config.ExceptionMethod = stringData["ExceptionMethod"]; config.Partial = data.ValueAs<bool>("Partial", false); } var ic = new SortedDictionary<string, LexerConfig.State.Case.Instruction.InstructionCode>(StringComparer.CurrentCultureIgnoreCase); ic["Clear"] = LexerConfig.State.Case.Instruction.InstructionCode.Clear; ic["InPos"] = LexerConfig.State.Case.Instruction.InstructionCode.InPos; ic["PushInput"] = LexerConfig.State.Case.Instruction.InstructionCode.PushInput; ic["Push"] = LexerConfig.State.Case.Instruction.InstructionCode.Push; ic["PushCode"] = LexerConfig.State.Case.Instruction.InstructionCode.PushCode; ic["InPos"] = LexerConfig.State.Case.Instruction.InstructionCode.InPos; ic["InCode"] = LexerConfig.State.Case.Instruction.InstructionCode.InCode; ic["FMA"] = LexerConfig.State.Case.Instruction.InstructionCode.FMA; ic["FSO"] = LexerConfig.State.Case.Instruction.InstructionCode.FSO; ic["Code"] = LexerConfig.State.Case.Instruction.InstructionCode.Code; ic["Xor"] = LexerConfig.State.Case.Instruction.InstructionCode.Xor; ic["Or"] = LexerConfig.State.Case.Instruction.InstructionCode.Or; ic["And"] = LexerConfig.State.Case.Instruction.InstructionCode.And; ic["State"] = LexerConfig.State.Case.Instruction.InstructionCode.State; ic["Die"] = LexerConfig.State.Case.Instruction.InstructionCode.Die; ic["Yield"] = LexerConfig.State.Case.Instruction.InstructionCode.Yield; ic["YieldInput"] = LexerConfig.State.Case.Instruction.InstructionCode.YieldInput; ic["ToLower"] = LexerConfig.State.Case.Instruction.InstructionCode.ToLower; ic["ToUpper"] = LexerConfig.State.Case.Instruction.InstructionCode.ToUpper; { var states = new List<LexerConfig.State>(); foreach (var state in data.ValueAs<ConfigurationDictionary>("States")) { ProcessState(state, states, ic); } config.States = states.ToArray(); } return config; }
public Lexer(LexerConfig config) { this.config = config; }
/// <summary> /// Generates code from a configuration /// </summary> /// <param name="data">The configuration</param> /// <param name="output">The output</param> public void GenerateLexer(LexerConfig data, TextWriter output) { IndentedTextWriter itw = new IndentedTextWriter(output); SortedDictionary<string, int> stateIndexes = new SortedDictionary<string, int>(); itw.Indent = 0; string[] namespaces = new[] { "System", "System.IO", "System.Text", "System.Collections.Generic", "System.Globalization", "KaoriStudio.Core.Text.Parsing", "KaoriStudio.Core.Helpers" }; foreach (var ns in namespaces) { itw.WriteLine("using {0};", ns); } itw.WriteLine("namespace {0}", data.Namespace); itw.WriteLine("{"); itw.Indent++; itw.WriteLine(data.Partial ? "public partial class {0}" : "public class {0}", data.Name); itw.WriteLine("{"); itw.Indent++; itw.WriteLine("public static IEnumerable<Token<{0}>> {1}(TextReader source, TokenPosition inpos)", data.Enum, data.Method); itw.WriteLine("{"); itw.Indent++; itw.WriteLine("int state = 0;"); itw.WriteLine("StringBuilder buffer = new StringBuilder();"); itw.WriteLine("TokenPosition pos = default(TokenPosition);"); itw.WriteLine("int code = 0;"); itw.WriteLine("int input;"); itw.WriteLine("do"); itw.WriteLine("{"); itw.Indent++; itw.WriteLine("input = source.Read();"); itw.WriteLine("if (input == '\\n')"); itw.WriteLine("{"); itw.Indent++; itw.WriteLine("inpos.Column = 1;"); itw.WriteLine("inpos.Argument++;"); itw.Indent--; itw.WriteLine("}"); itw.WriteLine("switch(state)"); itw.WriteLine("{"); itw.Indent++; for (int i = 0; i < data.States.Length; i++) stateIndexes[data.States[i].Name] = i; for (int i = 0; i < data.States.Length; i++) { LexerConfig.State state = data.States[i]; itw.WriteLine("#region \"{0}({1})\"", state.Name, i); itw.WriteLine("case {0}:", i); itw.Indent++; itw.WriteLine("switch(input)"); itw.WriteLine("{"); itw.Indent++; foreach (LexerConfig.State.Case scase in state.Cases) { if (scase.Matches.Length == 0) { itw.WriteLine("default:"); } else { foreach (int m in scase.Matches) itw.WriteLine("case {0}:", FancyChar(m)); } bool dead = false; itw.Indent++; foreach (var instruction in scase.Instructions) { switch (instruction.Code) { case LexerConfig.State.Case.Instruction.InstructionCode.And: itw.WriteLine("code &= {0};", instruction.Arguments[0]); break; case LexerConfig.State.Case.Instruction.InstructionCode.Clear: itw.WriteLine("buffer.Clear();"); break; case LexerConfig.State.Case.Instruction.InstructionCode.Code: itw.WriteLine("code = {0};", instruction.Arguments[0]); break; case LexerConfig.State.Case.Instruction.InstructionCode.Die: itw.WriteLine("throw new TextDeserializationException({0}(state, input), source, inpos);", data.ExceptionMethod); dead = true; break; case LexerConfig.State.Case.Instruction.InstructionCode.FMA: itw.WriteLine("code = code * {0} + {1};", instruction.Arguments[0], instruction.Arguments[1]); break; case LexerConfig.State.Case.Instruction.InstructionCode.FSO: itw.WriteLine("code = (code << {0}) | {1};", instruction.Arguments[0], instruction.Arguments[1]); break; case LexerConfig.State.Case.Instruction.InstructionCode.InCode: itw.WriteLine("code = input;"); break; case LexerConfig.State.Case.Instruction.InstructionCode.InPos: itw.WriteLine("pos = inpos;"); break; case LexerConfig.State.Case.Instruction.InstructionCode.Or: itw.WriteLine("code |= {0};", instruction.Arguments[0]); break; case LexerConfig.State.Case.Instruction.InstructionCode.Push: if (instruction.Arguments[0] is string) { var arg = instruction.Arguments[0].ToString(); itw.WriteLine("buffer.Append({0});", arg.Length == 1 ? FancyChar(arg[0]) : FancyString(arg)); } else itw.WriteLine("buffer.Append((char){0});", instruction.Arguments[0]); break; case LexerConfig.State.Case.Instruction.InstructionCode.PushCode: itw.WriteLine("buffer.Append((char)code);"); break; case LexerConfig.State.Case.Instruction.InstructionCode.PushInput: if (scase.Matches.Length == 1) itw.WriteLine("buffer.Append({0});", FancyChar(scase.Matches[0])); else itw.WriteLine("buffer.Append((char)input);"); break; case LexerConfig.State.Case.Instruction.InstructionCode.State: try { itw.WriteLine("state = {0};", (instruction.Arguments[0] is string) ? stateIndexes[instruction.Arguments[0].ToString()] : instruction.Arguments[0]); } catch (KeyNotFoundException) { throw new Exception(string.Format("Unknown state {0}", instruction.Arguments[0])); } break; case LexerConfig.State.Case.Instruction.InstructionCode.Xor: itw.WriteLine("code ^= {0};", instruction.Arguments[0]); break; case LexerConfig.State.Case.Instruction.InstructionCode.Yield: itw.WriteLine("yield return new Token<{0}>({0}.{1}, buffer.ToString(), pos);", data.Enum, instruction.Arguments[0]); break; case LexerConfig.State.Case.Instruction.InstructionCode.YieldInput: if (scase.Matches.Length == 1) itw.WriteLine("yield return new Token<{0}>({0}.{1}, {2}, inpos);", data.Enum, instruction.Arguments[0], FancyString(scase.Matches[0])); else itw.WriteLine("yield return new Token<{0}>({0}.{1}, ((char)input).ToString(), inpos);", data.Enum, instruction.Arguments[0]); break; case LexerConfig.State.Case.Instruction.InstructionCode.ToLower: itw.WriteLine("code = char.ToLowerInvariant((char)code);"); break; case LexerConfig.State.Case.Instruction.InstructionCode.ToUpper: itw.WriteLine("code = char.ToUpperInvariant((char)code);"); break; } } if (!dead) itw.WriteLine("break;"); itw.Indent--; } itw.Indent--; itw.WriteLine("}"); itw.WriteLine("break;"); itw.Indent--; itw.WriteLine("#endregion"); } itw.WriteLine("default:"); itw.Indent++; itw.WriteLine("throw new TextDeserializationException(string.Format(\"Unknown state number {0}\", state), source, inpos);"); itw.Indent--; itw.Indent--; itw.WriteLine("}"); itw.WriteLine("inpos.Index++;"); itw.WriteLine("inpos.Column++;"); itw.Indent--; itw.WriteLine("}"); itw.WriteLine("while(input != -1);"); itw.Indent--; itw.WriteLine("}"); itw.WriteLine("public static IEnumerable<Token<{0}>> {1}(TextReader source)", data.Enum, data.Method); itw.WriteLine("{"); itw.Indent++; itw.WriteLine("return {0}(source, new TokenPosition(0, 1, 1));", data.Method); itw.Indent--; itw.WriteLine("}"); itw.WriteLine("private static string {0}(int state, int input)", data.ExceptionMethod); itw.WriteLine("{"); itw.Indent++; itw.WriteLine("string[] states = {"); itw.Indent++; for (int i = 0; i < data.States.Length; i++) { itw.Write("\"{0}\"", data.States[i].Name); if (i != data.States.Length - 1) itw.Write(","); itw.WriteLine(); } itw.Indent--; itw.WriteLine("};"); itw.WriteLine("if (input == -1)"); itw.WriteLine("{"); itw.Indent++; itw.WriteLine("return string.Format(\"Unexpected EOF in {0}({1})\", states[state], state);"); itw.Indent--; itw.WriteLine("}"); itw.WriteLine("else"); itw.WriteLine("{"); itw.Indent++; itw.WriteLine("try"); itw.WriteLine("{"); itw.Indent++; itw.WriteLine("return string.Format(\"Unexpected character {0} ({1}) in {2}({3})\", (char)input, input, states[state], state);"); itw.Indent--; itw.WriteLine("}"); itw.WriteLine("catch(InvalidCastException)"); itw.WriteLine("{"); itw.Indent++; itw.WriteLine("return string.Format(\"Unexpected character {0} in {1}({2})\", input, states[state], state);"); itw.Indent--; itw.WriteLine("}"); itw.Indent--; itw.WriteLine("}"); itw.Indent--; itw.WriteLine("}"); itw.Indent--; itw.WriteLine("}"); itw.Indent--; itw.WriteLine("}"); }