public TokenEnumerator(FA lexer, IEnumerable <char> @string) { _lexer = lexer; _input = @string.GetEnumerator(); _buffer = new StringBuilder(); _initialStates = _lexer.FillEpsilonClosure(); Reset(); // Reset is used here to initialize the rest of the values }
/// <summary> /// Creates a new FA that will match a repetition of one or more of the specified FA expression /// </summary> /// <param name="expr">The expression to repeat</param> /// <param name="accept">The symbol to accept</param> /// <returns>A new FA that matches the specified FA one or more times</returns> public static FA Repeat(FA expr, string accept = "") { var result = expr.Clone(); result.FirstAcceptingState.EpsilonTransitions.Add(result); result.FirstAcceptingState.AcceptingSymbol = accept; return(result); }
/// <summary> /// Creates a new FA that matches the specified FA expression or empty /// </summary> /// <param name="expr">The expression to make optional</param> /// <param name="accept">The symbol to accept</param> /// <returns>A new FA that will match the specified expression or empty</returns> public static FA Optional(FA expr, string accept = "") { var result = expr.Clone(); var f = result.FirstAcceptingState; f.AcceptingSymbol = accept; result.EpsilonTransitions.Add(f); return(result); }
/// <summary> /// Creates an FA that will match any one of a set of a characters /// </summary> /// <param name="set">The set of characters that will be matched</param> /// <param name="accept">The symbol to accept</param> /// <returns>An FA that will match the specified set</returns> public static FA Set(IEnumerable <char> set, string accept = "") { var result = new FA(); var final = new FA(); final.AcceptingSymbol = accept; foreach (char ch in set) { result.Transitions.Add(ch, final); } return(result); }
/// <summary> /// Creates an FA that matches a literal string /// </summary> /// <param name="string">The string to match</param> /// <param name="accept">The symbol to accept</param> /// <returns>A new FA machine that will match this literal</returns> public static FA Literal(IEnumerable <char> @string, string accept = "") { var result = new FA(); var current = result; foreach (char ch in @string) { current.AcceptingSymbol = null; var fa = new FA(); fa.AcceptingSymbol = accept; current.Transitions.Add(ch, fa); current = fa; } return(result); }
/// <summary> /// Creates a new FA that matche any one of the FA expressions passed /// </summary> /// <param name="exprs">The expressions to match</param> /// <param name="accept">The symbol to accept</param> /// <returns>A new FA that will match the union of the FA expressions passed</returns> public static FA Or(IEnumerable <FA> exprs, string accept = "") { var result = new FA(); var final = new FA(); final.AcceptingSymbol = accept; foreach (var fa in exprs) { fa.EpsilonTransitions.Add(fa); var nfa = fa.Clone(); var nffa = fa.FirstAcceptingState; nfa.FirstAcceptingState.EpsilonTransitions.Add(final); nffa.AcceptingSymbol = null; } return(result); }
static void Demo(string[] args) { if (0 == args.Length) { Console.Error.WriteLine("Must specify input CFG"); return; } var cfg = CfgDocument.ReadFrom(args[0]); Console.WriteLine(cfg.ToString()); Console.WriteLine(); // not-necessary but faster access since we're not modifying: cfg.RebuildCache(); Console.WriteLine("See: http://hackingoff.com/compilers/ll-1-parser-generator"); Console.WriteLine(); Console.WriteLine("CFG has {0} rules composed of {1} non-terminals and {2} terminals for a total of {3} symbols", cfg.Rules.Count, cfg.FillNonTerminals().Count, cfg.FillTerminals().Count, cfg.FillSymbols().Count); Console.WriteLine(); Console.Write("Terminals:"); foreach (var t in cfg.FillTerminals()) { Console.Write(" "); Console.Write(t); } Console.WriteLine(); Console.WriteLine(); // compute the various aspects of the CFG var predict = cfg.FillPredict(); // var firsts = cfg.FillFirsts(); // we don't need this because we have predict var follows = cfg.FillFollows(); // enum some stuff foreach (var nt in cfg.FillNonTerminals()) { Console.WriteLine(nt + " has the following rules:"); foreach (var ntr in cfg.FillNonTerminalRules(nt)) { Console.Write("\t"); Console.WriteLine(ntr); } Console.WriteLine(); Console.WriteLine(nt + " has the following PREDICT:"); foreach (var t in predict[nt]) { Console.Write("\t"); Console.WriteLine((t.Symbol ?? "<empty>") + " - " + t.Rule); } Console.WriteLine(); // PREDICT makes this redundant //Console.WriteLine(nt + " has the following FIRSTS:"); //foreach (var t in firsts[nt]) //{ // Console.Write("\t"); // Console.WriteLine(t); //} //Console.WriteLine(); Console.WriteLine(nt + " has the following FOLLOWS:"); foreach (var t in follows[nt]) { Console.Write("\t"); Console.WriteLine(t); } Console.WriteLine(); } // now lets parse some stuff Console.WriteLine("Building simple parse table"); // the parse table is simply nested dictionaries where each outer key is a non-terminal // and the inner key is each terminal, where they map to a single rule. // lookups during parse are basically rule=parseTable[<topOfStack>][<currentToken>] var parseTable = new Dictionary <string, Dictionary <string, CfgRule> >(); foreach (var nt in cfg.FillNonTerminals()) { var d = new Dictionary <string, CfgRule>(); parseTable.Add(nt, d); foreach (var p in predict[nt]) { if (null != p.Symbol) { CfgRule or; if (d.TryGetValue(p.Symbol, out or)) { Console.Error.WriteLine("First-first conflict between " + p.Rule + " and " + or); } else { d.Add(p.Symbol, p.Rule); } } else { foreach (var f in follows[nt]) { CfgRule or; if (d.TryGetValue(f, out or)) { Console.Error.WriteLine("First-follows conflict between " + p.Rule + " and " + or); } else { d.Add(f, p.Rule); } } } } } #region Build a Lexer for our parser - out of scope of the CFG project but necessary Console.WriteLine("Building simple lexer"); var fas = new FA[] { FA.Literal("+", "add"), FA.Literal("*", "mul"), FA.Literal("(", "lparen"), FA.Literal(")", "rparen"), FA.Repeat(FA.Set("0123456789"), "int") }; var lexer = new FA(); for (var i = 0; i < fas.Length; i++) { lexer.EpsilonTransitions.Add(fas[i]); } Console.WriteLine(); #endregion var text = "(1+3)*2"; Console.WriteLine("Creating tokenizer"); var tokenizer = new Tokenizer(lexer, text); Console.WriteLine("Creating parser"); var parser = new LL1Parser(parseTable, tokenizer, "Expr"); Console.WriteLine(); Console.WriteLine("Parsing " + text); Console.WriteLine(parser.ParseSubtree()); }
/// <summary> /// Creates a new FA that will match a repetition of zero or more of the specified FA expressions /// </summary> /// <param name="expr">The expression to repeat</param> /// <param name="accept">The symbol to accept</param> /// <returns>A new FA that matches the specified FA zero or more times</returns> public static FA Kleene(FA expr, string accept = "") { return(Optional(Repeat(expr), accept)); }
public Tokenizer(FA lexer, IEnumerable <char> input) { _lexer = lexer; _input = input; }
/// <summary> /// This is where the work happens /// </summary> /// <returns>The symbol that was matched. members _state _line,_column,_position,_buffer and _input are also modified.</returns> string _Lex() { string acc; var states = _initialStates; _buffer.Clear(); switch (_state) { case -1: // initial if (!_MoveNextInput()) { _state = -2; acc = _GetAcceptingSymbol(states); if (null != acc) { return(acc); } else { return("#ERROR"); } } _state = 0; // running break; case -2: // end of stream return("#EOS"); } // Here's where we run most of the match. FillMove runs one interation of the NFA state machine. // We match until we can't match anymore (greedy matching) and then report the symbol of the last // match we found, or an error ("#ERROR") if we couldn't find one. while (true) { var next = FA.FillMove(states, _input.Current); if (0 == next.Count) // couldn't find any states { break; } _buffer.Append(_input.Current); states = next; if (!_MoveNextInput()) { // end of stream _state = -2; acc = _GetAcceptingSymbol(states); if (null != acc) // do we accept? { return(acc); } else { return("#ERROR"); } } } acc = _GetAcceptingSymbol(states); if (null != acc) // do we accept? { return(acc); } else { // handle the error condition _buffer.Append(_input.Current); if (!_MoveNextInput()) { _state = -2; } return("#ERROR"); } }