예제 #1
0
 public TokenEnumerator(FA lexer, IEnumerable <char> @string)
 {
     _lexer         = lexer;
     _input         = @string.GetEnumerator();
     _buffer        = new StringBuilder();
     _initialStates = _lexer.FillEpsilonClosure();
     Reset();             // Reset is used here to initialize the rest of the values
 }
예제 #2
0
        /// <summary>
        /// Creates a new FA that will match a repetition of one or more of the specified FA expression
        /// </summary>
        /// <param name="expr">The expression to repeat</param>
        /// <param name="accept">The symbol to accept</param>
        /// <returns>A new FA that matches the specified FA one or more times</returns>
        public static FA Repeat(FA expr, string accept = "")
        {
            var result = expr.Clone();

            result.FirstAcceptingState.EpsilonTransitions.Add(result);
            result.FirstAcceptingState.AcceptingSymbol = accept;
            return(result);
        }
예제 #3
0
        /// <summary>
        /// Creates a new FA that matches the specified FA expression or empty
        /// </summary>
        /// <param name="expr">The expression to make optional</param>
        /// <param name="accept">The symbol to accept</param>
        /// <returns>A new FA that will match the specified expression or empty</returns>
        public static FA Optional(FA expr, string accept = "")
        {
            var result = expr.Clone();
            var f      = result.FirstAcceptingState;

            f.AcceptingSymbol = accept;
            result.EpsilonTransitions.Add(f);
            return(result);
        }
예제 #4
0
        /// <summary>
        /// Creates an FA that will match any one of a set of a characters
        /// </summary>
        /// <param name="set">The set of characters that will be matched</param>
        /// <param name="accept">The symbol to accept</param>
        /// <returns>An FA that will match the specified set</returns>
        public static FA Set(IEnumerable <char> set, string accept = "")
        {
            var result = new FA();
            var final  = new FA();

            final.AcceptingSymbol = accept;
            foreach (char ch in set)
            {
                result.Transitions.Add(ch, final);
            }
            return(result);
        }
예제 #5
0
        /// <summary>
        /// Creates an FA that matches a literal string
        /// </summary>
        /// <param name="string">The string to match</param>
        /// <param name="accept">The symbol to accept</param>
        /// <returns>A new FA machine that will match this literal</returns>
        public static FA Literal(IEnumerable <char> @string, string accept = "")
        {
            var result  = new FA();
            var current = result;

            foreach (char ch in @string)
            {
                current.AcceptingSymbol = null;
                var fa = new FA();
                fa.AcceptingSymbol = accept;
                current.Transitions.Add(ch, fa);
                current = fa;
            }
            return(result);
        }
예제 #6
0
        /// <summary>
        /// Creates a new FA that matche any one of the FA expressions passed
        /// </summary>
        /// <param name="exprs">The expressions to match</param>
        /// <param name="accept">The symbol to accept</param>
        /// <returns>A new FA that will match the union of the FA expressions passed</returns>
        public static FA Or(IEnumerable <FA> exprs, string accept = "")
        {
            var result = new FA();
            var final  = new FA();

            final.AcceptingSymbol = accept;
            foreach (var fa in exprs)
            {
                fa.EpsilonTransitions.Add(fa);
                var nfa  = fa.Clone();
                var nffa = fa.FirstAcceptingState;
                nfa.FirstAcceptingState.EpsilonTransitions.Add(final);
                nffa.AcceptingSymbol = null;
            }
            return(result);
        }
예제 #7
0
        static void Demo(string[] args)
        {
            if (0 == args.Length)
            {
                Console.Error.WriteLine("Must specify input CFG");
                return;
            }
            var cfg = CfgDocument.ReadFrom(args[0]);

            Console.WriteLine(cfg.ToString());
            Console.WriteLine();
            // not-necessary but faster access since we're not modifying:

            cfg.RebuildCache();
            Console.WriteLine("See: http://hackingoff.com/compilers/ll-1-parser-generator");
            Console.WriteLine();
            Console.WriteLine("CFG has {0} rules composed of {1} non-terminals and {2} terminals for a total of {3} symbols", cfg.Rules.Count, cfg.FillNonTerminals().Count, cfg.FillTerminals().Count, cfg.FillSymbols().Count);
            Console.WriteLine();

            Console.Write("Terminals:");
            foreach (var t in cfg.FillTerminals())
            {
                Console.Write(" ");
                Console.Write(t);
            }
            Console.WriteLine();
            Console.WriteLine();

            // compute the various aspects of the CFG
            var predict = cfg.FillPredict();
            // var firsts = cfg.FillFirsts(); // we don't need this because we have predict
            var follows = cfg.FillFollows();

            // enum some stuff
            foreach (var nt in cfg.FillNonTerminals())
            {
                Console.WriteLine(nt + " has the following rules:");
                foreach (var ntr in cfg.FillNonTerminalRules(nt))
                {
                    Console.Write("\t");
                    Console.WriteLine(ntr);
                }
                Console.WriteLine();
                Console.WriteLine(nt + " has the following PREDICT:");
                foreach (var t in predict[nt])
                {
                    Console.Write("\t");
                    Console.WriteLine((t.Symbol ?? "<empty>") + " - " + t.Rule);
                }
                Console.WriteLine();
                // PREDICT makes this redundant
                //Console.WriteLine(nt + " has the following FIRSTS:");
                //foreach (var t in firsts[nt])
                //{
                //	Console.Write("\t");
                //	Console.WriteLine(t);
                //}
                //Console.WriteLine();
                Console.WriteLine(nt + " has the following FOLLOWS:");
                foreach (var t in follows[nt])
                {
                    Console.Write("\t");
                    Console.WriteLine(t);
                }
                Console.WriteLine();
            }

            // now lets parse some stuff

            Console.WriteLine("Building simple parse table");

            // the parse table is simply nested dictionaries where each outer key is a non-terminal
            // and the inner key is each terminal, where they map to a single rule.
            // lookups during parse are basically rule=parseTable[<topOfStack>][<currentToken>]
            var parseTable = new Dictionary <string, Dictionary <string, CfgRule> >();

            foreach (var nt in cfg.FillNonTerminals())
            {
                var d = new Dictionary <string, CfgRule>();
                parseTable.Add(nt, d);
                foreach (var p in predict[nt])
                {
                    if (null != p.Symbol)
                    {
                        CfgRule or;
                        if (d.TryGetValue(p.Symbol, out or))
                        {
                            Console.Error.WriteLine("First-first conflict between " + p.Rule + " and " + or);
                        }
                        else
                        {
                            d.Add(p.Symbol, p.Rule);
                        }
                    }
                    else
                    {
                        foreach (var f in follows[nt])
                        {
                            CfgRule or;
                            if (d.TryGetValue(f, out or))
                            {
                                Console.Error.WriteLine("First-follows conflict between " + p.Rule + " and " + or);
                            }
                            else
                            {
                                d.Add(f, p.Rule);
                            }
                        }
                    }
                }
            }

            #region Build a Lexer for our parser - out of scope of the CFG project but necessary
            Console.WriteLine("Building simple lexer");
            var fas = new FA[]
            {
                FA.Literal("+", "add"),
                FA.Literal("*", "mul"),
                FA.Literal("(", "lparen"),
                FA.Literal(")", "rparen"),
                FA.Repeat(FA.Set("0123456789"), "int")
            };

            var lexer = new FA();
            for (var i = 0; i < fas.Length; i++)
            {
                lexer.EpsilonTransitions.Add(fas[i]);
            }
            Console.WriteLine();
            #endregion

            var text = "(1+3)*2";

            Console.WriteLine("Creating tokenizer");
            var tokenizer = new Tokenizer(lexer, text);
            Console.WriteLine("Creating parser");
            var parser = new LL1Parser(parseTable, tokenizer, "Expr");
            Console.WriteLine();
            Console.WriteLine("Parsing " + text);
            Console.WriteLine(parser.ParseSubtree());
        }
예제 #8
0
 /// <summary>
 /// Creates a new FA that will match a repetition of zero or more of the specified FA expressions
 /// </summary>
 /// <param name="expr">The expression to repeat</param>
 /// <param name="accept">The symbol to accept</param>
 /// <returns>A new FA that matches the specified FA zero or more times</returns>
 public static FA Kleene(FA expr, string accept = "")
 {
     return(Optional(Repeat(expr), accept));
 }
예제 #9
0
 public Tokenizer(FA lexer, IEnumerable <char> input)
 {
     _lexer = lexer;
     _input = input;
 }
예제 #10
0
        /// <summary>
        /// This is where the work happens
        /// </summary>
        /// <returns>The symbol that was matched. members _state _line,_column,_position,_buffer and _input are also modified.</returns>
        string _Lex()
        {
            string acc;
            var    states = _initialStates;

            _buffer.Clear();
            switch (_state)
            {
            case -1:                     // initial
                if (!_MoveNextInput())
                {
                    _state = -2;
                    acc    = _GetAcceptingSymbol(states);
                    if (null != acc)
                    {
                        return(acc);
                    }
                    else
                    {
                        return("#ERROR");
                    }
                }
                _state = 0;                         // running
                break;

            case -2:                     // end of stream
                return("#EOS");
            }
            // Here's where we run most of the match. FillMove runs one interation of the NFA state machine.
            // We match until we can't match anymore (greedy matching) and then report the symbol of the last
            // match we found, or an error ("#ERROR") if we couldn't find one.
            while (true)
            {
                var next = FA.FillMove(states, _input.Current);
                if (0 == next.Count)                 // couldn't find any states
                {
                    break;
                }
                _buffer.Append(_input.Current);

                states = next;
                if (!_MoveNextInput())
                {
                    // end of stream
                    _state = -2;
                    acc    = _GetAcceptingSymbol(states);
                    if (null != acc)                     // do we accept?
                    {
                        return(acc);
                    }
                    else
                    {
                        return("#ERROR");
                    }
                }
            }
            acc = _GetAcceptingSymbol(states);
            if (null != acc)             // do we accept?
            {
                return(acc);
            }
            else
            {
                // handle the error condition
                _buffer.Append(_input.Current);
                if (!_MoveNextInput())
                {
                    _state = -2;
                }
                return("#ERROR");
            }
        }