Example #1
0
File: Lexer.cs Project: Daxea/Lex
 public Lexer(string text, LexerConfig lexerConfig)
 {
     _text        = text;
     _position    = 0;
     _currentChar = _text[0];
     _errors      = new List <Exception>();
     _lexerConfig = lexerConfig;
     PrepareTokenMappings();
 }
Example #2
0
File: Lexer.cs Project: Daxea/Lex
 public Lexer(string text, List <TokenMapping> mappings)
 {
     _text        = text;
     _position    = 0;
     _currentChar = _text[0];
     _errors      = new List <Exception>();
     _lexerConfig = LexerConfig.Default(mappings);
     PrepareTokenMappings();
 }
Example #3
0
        /// <summary>
        /// Generates a config from a map
        /// </summary>
        /// <param name="data">The map</param>
        /// <returns>The config</returns>
        public LexerConfig GenerateConfig(ConfigurationDictionary data)
        {
            LexerConfig config = new LexerConfig();

            {
                var stringData = data.Caster<string>();
                config.Namespace = stringData["Namespace"];
                config.Name = stringData["Name"];
                config.Enum = stringData["Enum"];
                config.Method = stringData["Method"];
                config.ExceptionMethod = stringData["ExceptionMethod"];
                config.Partial = data.ValueAs<bool>("Partial", false);
            }

            var ic = new SortedDictionary<string, LexerConfig.State.Case.Instruction.InstructionCode>(StringComparer.CurrentCultureIgnoreCase);

            ic["Clear"] = LexerConfig.State.Case.Instruction.InstructionCode.Clear;
            ic["InPos"] = LexerConfig.State.Case.Instruction.InstructionCode.InPos;
            ic["PushInput"] = LexerConfig.State.Case.Instruction.InstructionCode.PushInput;
            ic["Push"] = LexerConfig.State.Case.Instruction.InstructionCode.Push;
            ic["PushCode"] = LexerConfig.State.Case.Instruction.InstructionCode.PushCode;
            ic["InPos"] = LexerConfig.State.Case.Instruction.InstructionCode.InPos;
            ic["InCode"] = LexerConfig.State.Case.Instruction.InstructionCode.InCode;
            ic["FMA"] = LexerConfig.State.Case.Instruction.InstructionCode.FMA;
            ic["FSO"] = LexerConfig.State.Case.Instruction.InstructionCode.FSO;
            ic["Code"] = LexerConfig.State.Case.Instruction.InstructionCode.Code;
            ic["Xor"] = LexerConfig.State.Case.Instruction.InstructionCode.Xor;
            ic["Or"] = LexerConfig.State.Case.Instruction.InstructionCode.Or;
            ic["And"] = LexerConfig.State.Case.Instruction.InstructionCode.And;
            ic["State"] = LexerConfig.State.Case.Instruction.InstructionCode.State;
            ic["Die"] = LexerConfig.State.Case.Instruction.InstructionCode.Die;
            ic["Yield"] = LexerConfig.State.Case.Instruction.InstructionCode.Yield;
            ic["YieldInput"] = LexerConfig.State.Case.Instruction.InstructionCode.YieldInput;
            ic["ToLower"] = LexerConfig.State.Case.Instruction.InstructionCode.ToLower;
            ic["ToUpper"] = LexerConfig.State.Case.Instruction.InstructionCode.ToUpper;

            {
                var states = new List<LexerConfig.State>();

                foreach (var state in data.ValueAs<ConfigurationDictionary>("States"))
                {
                    ProcessState(state, states, ic);
                }

                config.States = states.ToArray();
            }

            return config;
        }
 public Lexer(LexerConfig config)
 {
     this.config = config;
 }
Example #5
0
        /// <summary>
        /// Generates code from a configuration
        /// </summary>
        /// <param name="data">The configuration</param>
        /// <param name="output">The output</param>
        public void GenerateLexer(LexerConfig data, TextWriter output)
        {
            IndentedTextWriter itw = new IndentedTextWriter(output);
            SortedDictionary<string, int> stateIndexes = new SortedDictionary<string, int>();

            itw.Indent = 0;

            string[] namespaces = new[] { "System", "System.IO", "System.Text",
                "System.Collections.Generic", "System.Globalization", "KaoriStudio.Core.Text.Parsing", "KaoriStudio.Core.Helpers" };

            foreach (var ns in namespaces)
            {
                itw.WriteLine("using {0};", ns);
            }

            itw.WriteLine("namespace {0}", data.Namespace);
            itw.WriteLine("{");
            itw.Indent++;

            itw.WriteLine(data.Partial ? "public partial class {0}" : "public class {0}", data.Name);
            itw.WriteLine("{");
            itw.Indent++;

            itw.WriteLine("public static IEnumerable<Token<{0}>> {1}(TextReader source, TokenPosition inpos)", data.Enum, data.Method);
            itw.WriteLine("{");
            itw.Indent++;

            itw.WriteLine("int state = 0;");
            itw.WriteLine("StringBuilder buffer = new StringBuilder();");
            itw.WriteLine("TokenPosition pos = default(TokenPosition);");
            itw.WriteLine("int code = 0;");
            itw.WriteLine("int input;");

            itw.WriteLine("do");
            itw.WriteLine("{");
            itw.Indent++;

            itw.WriteLine("input = source.Read();");
            itw.WriteLine("if (input == '\\n')");
            itw.WriteLine("{");
            itw.Indent++;
            itw.WriteLine("inpos.Column = 1;");
            itw.WriteLine("inpos.Argument++;");
            itw.Indent--;
            itw.WriteLine("}");

            itw.WriteLine("switch(state)");
            itw.WriteLine("{");
            itw.Indent++;

            for (int i = 0; i < data.States.Length; i++)
                stateIndexes[data.States[i].Name] = i;

            for (int i = 0; i < data.States.Length; i++)
            {
                LexerConfig.State state = data.States[i];
                itw.WriteLine("#region \"{0}({1})\"", state.Name, i);
                itw.WriteLine("case {0}:", i);
                itw.Indent++;
                itw.WriteLine("switch(input)");
                itw.WriteLine("{");
                itw.Indent++;

                foreach (LexerConfig.State.Case scase in state.Cases)
                {
                    if (scase.Matches.Length == 0)
                    {
                        itw.WriteLine("default:");
                    }
                    else
                    {
                        foreach (int m in scase.Matches)
                            itw.WriteLine("case {0}:", FancyChar(m));

                    }

                    bool dead = false;
                    itw.Indent++;

                    foreach (var instruction in scase.Instructions)
                    {
                        switch (instruction.Code)
                        {
                            case LexerConfig.State.Case.Instruction.InstructionCode.And:
                                itw.WriteLine("code &= {0};", instruction.Arguments[0]);
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.Clear:
                                itw.WriteLine("buffer.Clear();");
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.Code:
                                itw.WriteLine("code = {0};", instruction.Arguments[0]);
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.Die:
                                itw.WriteLine("throw new TextDeserializationException({0}(state, input), source, inpos);", data.ExceptionMethod);
                                dead = true;
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.FMA:
                                itw.WriteLine("code = code * {0} + {1};", instruction.Arguments[0], instruction.Arguments[1]);
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.FSO:
                                itw.WriteLine("code = (code << {0}) | {1};", instruction.Arguments[0], instruction.Arguments[1]);
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.InCode:
                                itw.WriteLine("code = input;");
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.InPos:
                                itw.WriteLine("pos = inpos;");
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.Or:
                                itw.WriteLine("code |= {0};", instruction.Arguments[0]);
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.Push:
                                if (instruction.Arguments[0] is string)
                                {
                                    var arg = instruction.Arguments[0].ToString();
                                    itw.WriteLine("buffer.Append({0});", arg.Length == 1 ? FancyChar(arg[0]) : FancyString(arg));
                                }
                                else
                                    itw.WriteLine("buffer.Append((char){0});", instruction.Arguments[0]);
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.PushCode:
                                itw.WriteLine("buffer.Append((char)code);");
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.PushInput:
                                if (scase.Matches.Length == 1)
                                    itw.WriteLine("buffer.Append({0});", FancyChar(scase.Matches[0]));
                                else
                                    itw.WriteLine("buffer.Append((char)input);");
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.State:
                                try
                                {
                                    itw.WriteLine("state = {0};", (instruction.Arguments[0] is string) ? stateIndexes[instruction.Arguments[0].ToString()] : instruction.Arguments[0]);
                                }
                                catch (KeyNotFoundException)
                                {
                                    throw new Exception(string.Format("Unknown state {0}", instruction.Arguments[0]));
                                }
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.Xor:
                                itw.WriteLine("code ^= {0};", instruction.Arguments[0]);
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.Yield:
                                itw.WriteLine("yield return new Token<{0}>({0}.{1}, buffer.ToString(), pos);", data.Enum, instruction.Arguments[0]);
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.YieldInput:
                                if (scase.Matches.Length == 1)
                                    itw.WriteLine("yield return new Token<{0}>({0}.{1}, {2}, inpos);", data.Enum, instruction.Arguments[0], FancyString(scase.Matches[0]));
                                else
                                    itw.WriteLine("yield return new Token<{0}>({0}.{1}, ((char)input).ToString(), inpos);", data.Enum, instruction.Arguments[0]);
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.ToLower:
                                itw.WriteLine("code = char.ToLowerInvariant((char)code);");
                                break;
                            case LexerConfig.State.Case.Instruction.InstructionCode.ToUpper:
                                itw.WriteLine("code = char.ToUpperInvariant((char)code);");
                                break;
                        }
                    }

                    if (!dead)
                        itw.WriteLine("break;");
                    itw.Indent--;
                }

                itw.Indent--;
                itw.WriteLine("}");
                itw.WriteLine("break;");
                itw.Indent--;
                itw.WriteLine("#endregion");
            }
            itw.WriteLine("default:");
            itw.Indent++;
            itw.WriteLine("throw new TextDeserializationException(string.Format(\"Unknown state number {0}\", state), source, inpos);");
            itw.Indent--;
            itw.Indent--;
            itw.WriteLine("}");
            itw.WriteLine("inpos.Index++;");
            itw.WriteLine("inpos.Column++;");
            itw.Indent--;
            itw.WriteLine("}");
            itw.WriteLine("while(input != -1);");
            itw.Indent--;
            itw.WriteLine("}");

            itw.WriteLine("public static IEnumerable<Token<{0}>> {1}(TextReader source)", data.Enum, data.Method);
            itw.WriteLine("{");
            itw.Indent++;

            itw.WriteLine("return {0}(source, new TokenPosition(0, 1, 1));", data.Method);

            itw.Indent--;
            itw.WriteLine("}");

            itw.WriteLine("private static string {0}(int state, int input)", data.ExceptionMethod);
            itw.WriteLine("{");
            itw.Indent++;

            itw.WriteLine("string[] states = {");
            itw.Indent++;

            for (int i = 0; i < data.States.Length; i++)
            {
                itw.Write("\"{0}\"", data.States[i].Name);
                if (i != data.States.Length - 1)
                    itw.Write(",");
                itw.WriteLine();
            }

            itw.Indent--;
            itw.WriteLine("};");

            itw.WriteLine("if (input == -1)");
            itw.WriteLine("{");
            itw.Indent++;
            itw.WriteLine("return string.Format(\"Unexpected EOF in {0}({1})\", states[state], state);");
            itw.Indent--;
            itw.WriteLine("}");

            itw.WriteLine("else");
            itw.WriteLine("{");
            itw.Indent++;

            itw.WriteLine("try");
            itw.WriteLine("{");
            itw.Indent++;

            itw.WriteLine("return string.Format(\"Unexpected character {0} ({1}) in {2}({3})\", (char)input, input, states[state], state);");

            itw.Indent--;
            itw.WriteLine("}");
            itw.WriteLine("catch(InvalidCastException)");
            itw.WriteLine("{");
            itw.Indent++;

            itw.WriteLine("return string.Format(\"Unexpected character {0} in {1}({2})\", input, states[state], state);");

            itw.Indent--;
            itw.WriteLine("}");

            itw.Indent--;
            itw.WriteLine("}");

            itw.Indent--;
            itw.WriteLine("}");

            itw.Indent--;
            itw.WriteLine("}");

            itw.Indent--;
            itw.WriteLine("}");
        }