Beispiel #1
0
        static void _RunCompiledLexCodeGen()
        {
            // create our expressions
            var digits = CharFA <string> .Repeat(
                CharFA <string> .Set("0123456789"),
                1, -1
                , "Digits");

            var word = CharFA <string> .Repeat(
                CharFA <string> .Set(new CharRange[] { new CharRange('A', 'Z'), new CharRange('a', 'z') }),
                1, -1
                , "Word");

            var whitespace = CharFA <string> .Repeat(
                CharFA <string> .Set(" \t\r\n\v\f"),
                1, -1
                , "Whitespace");

            // initialize our lexer
            var lexer = CharFA <string> .ToLexer(digits, word, whitespace);

            // create the symbol table (include the error symbol at index/id 3)
            var symbolTable = new string[] { "Digits", "Word", "Whitespace", "#ERROR" };
            // create the DFA table we'll use to generate code
            var dfaTable = lexer.ToDfaStateTable(symbolTable);
            // create our new class
            var compClass = new CodeTypeDeclaration("RegexGenerated");

            compClass.TypeAttributes = System.Reflection.TypeAttributes.Class;
            compClass.Attributes     = MemberAttributes.Final | MemberAttributes.Static;
            // add the symbol table field - in production we'll set the name
            // to something more appropriate
            var symtblField = new CodeMemberField(typeof(string[]), "LexSymbols");

            symtblField.Attributes = MemberAttributes.Static | MemberAttributes.Public;
            // generate the symbol table init code
            symtblField.InitExpression = CharFA <string> .GenerateSymbolTableInitializer(symbolTable);

            compClass.Members.Add(symtblField);
            // Generate and add the compiled lex method code
            compClass.Members.Add(CharFA <string> .GenerateLexMethod(dfaTable, 3));
            // in production we'd change the name of the returned method
            // above
            // add the DFA table field - in production we'd change the name
            var dfatblField = new CodeMemberField(typeof(CharDfaEntry[]), "LexDfaTable");

            dfatblField.Attributes = MemberAttributes.Static | MemberAttributes.Public;
            // generate the DFA state table init code
            dfatblField.InitExpression = CharFA <string> .GenerateDfaStateTableInitializer(dfaTable);

            compClass.Members.Add(dfatblField);
            // create the C# provider and generate the code
            // we'll usually want to put this in a namespace
            // but we haven't here
            var prov = CodeDomProvider.CreateProvider("cs");

            prov.GenerateCodeFromType(compClass, Console.Out, new CodeGeneratorOptions());
        }
Beispiel #2
0
        static void _RunMatch()
        {
            var test = "foo123_ _bar";

            var word = CharFA <string> .Repeat(
                CharFA <string> .Set(new CharRange[] { new CharRange('A', 'Z'), new CharRange('a', 'z') }),
                1, -1
                , "Word");

            var         dfaWord      = word.ToDfa();
            var         dfaTableWord = word.ToDfaStateTable();
            CharFAMatch match;
            var         pc = ParseContext.Create(test);

            Console.WriteLine("Matching words with an NFA:");
            while (null != (match = word.Match(pc)))
            {
                Console.WriteLine("Found match at {0}: {1}", match.Position, match.Value);
            }
            Console.WriteLine();
            pc = ParseContext.Create(test);
            Console.WriteLine("Matching words with a DFA:");
            while (null != (match = dfaWord.MatchDfa(pc)))
            {
                Console.WriteLine("Found match at {0}: {1}", match.Position, match.Value);
            }
            Console.WriteLine();
            pc = ParseContext.Create(test);
            Console.WriteLine("Matching words with a DFA state table:");
            while (null != (match = CharFA <string> .MatchDfa(dfaTableWord, pc)))
            {
                Console.WriteLine("Found match at {0}: {1}", match.Position, match.Value);
            }
            Console.WriteLine();
            pc = ParseContext.Create(test);
            Console.WriteLine("Matching words with a compiled DFA:");
            while (null != (match = Match(pc)))
            {
                Console.WriteLine("Found match at {0}: {1}", match.Position, match.Value);
            }
            Console.WriteLine();
        }
Beispiel #3
0
        static void _BuildArticleImages()
        {
            // this generates the figures used in the code project article
            // at https://www.codeproject.com/Articles/5251476/How-to-Build-a-Regex-Engine-in-Csharp
            var litA = CharFA <string> .Literal("ABC", "Accept");

            litA.RenderToFile(@"..\..\..\literal.jpg");
            var litAa = CharFA <string> .CaseInsensitive(litA, "Accept");

            litAa.RenderToFile(@"..\..\..\literal_ci.jpg");
            var opt = CharFA <string> .Optional(litA, "Accept");

            opt.RenderToFile(@"..\..\..\optional.jpg");
            var litB = CharFA <string> .Literal("DEF");

            var or = CharFA <string> .Or(new CharFA <string>[] { litA, litB }, "Accept");

            or.RenderToFile(@"..\..\..\or.jpg");
            var set = CharFA <string> .Set("ABC", "Accept");

            set.RenderToFile(@"..\..\..\set.jpg");
            var loop = CharFA <string> .Repeat(litA, 1, -1, "Accept");

            loop.RenderToFile(@"..\..\..\repeat.jpg");
            var concat = CharFA <string> .Concat(new CharFA <string>[] { litA, litB }, "Accept");

            concat.RenderToFile(@"..\..\..\concat.jpg");
            var foobar = CharFA <string> .Or(new CharFA <string>[] { CharFA <string> .Literal("foo"), CharFA <string> .Literal("bar") }, "Accept");

            foobar.RenderToFile(@"..\..\..\foobar_nfa.jpg");
            var rfoobar = foobar.Reduce();

            rfoobar.RenderToFile(@"..\..\..\foobar.jpg");
            var lfoobar = CharFA <string> .Repeat(foobar, 1, -1, "Accept");

            lfoobar.RenderToFile(@"..\..\..\foobar_loop_nfa.jpg");
            var rlfoobar = lfoobar.Reduce();

            rlfoobar.RenderToFile(@"..\..\..\foobar_loop.jpg");

            var digits = CharFA <string> .Repeat(
                CharFA <string> .Set("0123456789"),
                1, -1
                , "Digits");

            var word = CharFA <string> .Repeat(
                CharFA <string> .Set(new CharRange[] { new CharRange('A', 'Z'), new CharRange('a', 'z') }),
                1, -1
                , "Word");

            var whitespace = CharFA <string> .Repeat(
                CharFA <string> .Set(" \t\r\n\v\f"),
                1, -1
                , "Whitespace");

            var lexer = CharFA <string> .ToLexer(digits, word, whitespace);

            lexer.RenderToFile(@"..\..\..\lexer.jpg");
            var dopt = new CharFA <string> .DotGraphOptions();

            dopt.DebugSourceNfa = lexer;
            var dlexer = lexer.ToDfa();

            dlexer.RenderToFile(@"..\..\..\dlexer.jpg", dopt
                                );
            dlexer.RenderToFile(@"..\..\..\dlexer2.jpg");
            var dom = RegexExpression.Parse("(ABC|DEF)+");
            var fa  = dom.ToFA("Accept");

            fa.RenderToFile(@"..\..\..\ABCorDEFloop.jpg");
        }
Beispiel #4
0
        static void _RunLexer()
        {
            var digits = CharFA <string> .Repeat(
                CharFA <string> .Set("0123456789"),
                1, -1
                , "Digits");

            var word = CharFA <string> .Repeat(
                CharFA <string> .Set(new CharRange[] { new CharRange('A', 'Z'), new CharRange('a', 'z') }),
                1, -1
                , "Word");

            var whitespace = CharFA <string> .Repeat(
                CharFA <string> .Set(" \t\r\n\v\f"),
                1, -1
                , "Whitespace");

            var lexer = CharFA <string> .ToLexer(digits, word, whitespace);

            var lexerDfa = lexer.ToDfa();

            lexerDfa.TrimDuplicates();
            // we use a symbol table with the DFA state table to map ids back to strings
            var symbolTable = new string[] { "Digits", "Word", "Whitespace", "#ERROR" };
            // make sure to pass the symbol table if you're using one
            var dfaTable = lexer.ToDfaStateTable(symbolTable);
            var test     = "foo123_ _bar";

            Console.WriteLine("Lex using the NFA");
            // create a parse context over our test string
            var pc = ParseContext.Create(test);

            // while not end of input
            while (-1 != pc.Current)
            {
                // clear the capture so that we don't keep appending the token data
                pc.ClearCapture();
                // lex the next token
                var acc = lexer.Lex(pc, "#ERROR");
                // write the result
                Console.WriteLine("{0}: {1}", acc, pc.GetCapture());
            }
            Console.WriteLine();
            Console.WriteLine("Lex using the DFA");
            // create a new parse context over our test string
            // because our old parse context is now past the end
            pc = ParseContext.Create(test);
            while (-1 != pc.Current)
            {
                pc.ClearCapture();
                // lex using the DFA. This works exactly like
                // the previous Lex method except that it's
                // optimized for DFA traversal.
                // DO NOT use this with an NFA. It won't work
                // but won't error (can't check for perf reasons)
                var acc = lexerDfa.LexDfa(pc, "#ERROR");
                // write the result
                Console.WriteLine("{0}: {1}", acc, pc.GetCapture());
            }
            Console.WriteLine();
            Console.WriteLine("Lex using the DFA state table");
            pc = ParseContext.Create(test);
            while (-1 != pc.Current)
            {
                pc.ClearCapture();
                // Lex using our DFA table. This is a little different
                // because it's a static method that takes CharDfaEntry[]
                // as its first parameter. It also uses symbol ids instead
                // of the actual symbol. You must map them back using the
                // symbol table you created earlier.
                var acc = CharFA <string> .LexDfa(dfaTable, pc, 3);

                // when we write this, we map our symbol id back to the
                // symbol using our symbol table
                Console.WriteLine("{0}: {1}", symbolTable[acc], pc.GetCapture());
            }
            Console.WriteLine();
            Console.WriteLine("Lex using our compiled lex method");
            pc = ParseContext.Create(test);
            while (-1 != pc.Current)
            {
                pc.ClearCapture();
                // Lex using our compiledDFA. Like the table driven lex
                // this also uses symbol ids instead of the actual symbol.
                var acc = Lex(pc);
                // when we write this, we map our symbol id back to the
                // symbol using our symbol table
                Console.WriteLine("{0}: {1}", symbolTable[acc], pc.GetCapture());
            }
            Console.WriteLine();
        }