/// <summary> /// Creates a new FA that will match a repetition of one or more of the specified FA expression /// </summary> /// <param name="expr">The expression to repeat</param> /// <param name="accept">The symbol to accept</param> /// <returns>A new FA that matches the specified FA one or more times</returns> public static FA Repeat(FA expr, string accept = "") { var result = expr.Clone(); result.FirstAcceptingState.EpsilonTransitions.Add(result); result.FirstAcceptingState.AcceptingSymbol = accept; return(result); }
public TokenEnumerator(FA lexer, IEnumerable <char> @string) { _lexer = lexer; _input = @string.GetEnumerator(); _buffer = new StringBuilder(); _initialStates = _lexer.FillEpsilonClosure(); Reset(); // Reset is used here to initialize the rest of the values }
/// <summary> /// Creates a new FA that matches the specified FA expression or empty /// </summary> /// <param name="expr">The expression to make optional</param> /// <param name="accept">The symbol to accept</param> /// <returns>A new FA that will match the specified expression or empty</returns> public static FA Optional(FA expr, string accept = "") { var result = expr.Clone(); var f = result.FirstAcceptingState; f.AcceptingSymbol = accept; result.EpsilonTransitions.Add(f); return(result); }
/// <summary> /// Creates an FA that will match any one of a set of a characters /// </summary> /// <param name="set">The set of characters that will be matched</param> /// <param name="accept">The symbol to accept</param> /// <returns>An FA that will match the specified set</returns> public static FA Set(IEnumerable <char> set, string accept = "") { var result = new FA(); var final = new FA(); final.AcceptingSymbol = accept; foreach (char ch in set) { result.Transitions.Add(ch, final); } return(result); }
/// <summary> /// Creates an FA that matches a literal string /// </summary> /// <param name="string">The string to match</param> /// <param name="accept">The symbol to accept</param> /// <returns>A new FA machine that will match this literal</returns> public static FA Literal(IEnumerable <char> @string, string accept = "") { var result = new FA(); var current = result; foreach (char ch in @string) { current.AcceptingSymbol = null; var fa = new FA(); fa.AcceptingSymbol = accept; current.Transitions.Add(ch, fa); current = fa; } return(result); }
/// <summary> /// Creates a new FA that matche any one of the FA expressions passed /// </summary> /// <param name="exprs">The expressions to match</param> /// <param name="accept">The symbol to accept</param> /// <returns>A new FA that will match the union of the FA expressions passed</returns> public static FA Or(IEnumerable <FA> exprs, string accept = "") { var result = new FA(); var final = new FA(); final.AcceptingSymbol = accept; foreach (var fa in exprs) { fa.EpsilonTransitions.Add(fa); var nfa = fa.Clone(); var nffa = fa.FirstAcceptingState; nfa.FirstAcceptingState.EpsilonTransitions.Add(final); nffa.AcceptingSymbol = null; } return(result); }
static void _RunLesson2() { // our regular expression engine does not have its own parser // therefore we must create the expressions manually by using // the appropriate construction methods. // create a new lexer with the following five expressions: // four self titled literals +, *, (, and ) // one regex [0-9]+ as "int" // note that the symbols we use here match the terminals used in our // CFG grammar from lesson 1. This is important. _lexer = new FA(); _lexer.EpsilonTransitions.Add(FA.Literal("+", "+")); _lexer.EpsilonTransitions.Add(FA.Literal("*", "*")); _lexer.EpsilonTransitions.Add(FA.Literal("(", "(")); _lexer.EpsilonTransitions.Add(FA.Literal(")", ")")); _lexer.EpsilonTransitions.Add(FA.Repeat(FA.Set("0123456789"), "int")); Console.WriteLine("Lesson 2 - FA Lexer"); // there's no easy way to show the contents of this machine so we'll just show the total states Console.WriteLine("NFA machine containes {0} total states", _lexer.FillClosure().Count); Console.WriteLine(); }
/// <summary> /// Creates a new FA that will match a repetition of zero or more of the specified FA expressions /// </summary> /// <param name="expr">The expression to repeat</param> /// <param name="accept">The symbol to accept</param> /// <returns>A new FA that matches the specified FA zero or more times</returns> public static FA Kleene(FA expr, string accept = "") { return(Optional(Repeat(expr), accept)); }
public Tokenizer(FA lexer, IEnumerable <char> input) { _lexer = lexer; _input = input; }
/// <summary> /// This is where the work happens /// </summary> /// <returns>The symbol that was matched. members _state _line,_column,_position,_buffer and _input are also modified.</returns> string _Lex() { string acc; var states = _initialStates; _buffer.Clear(); switch (_state) { case -1: // initial if (!_MoveNextInput()) { _state = -2; acc = _GetAcceptingSymbol(states); if (null != acc) { return(acc); } else { return("#ERROR"); } } _state = 0; // running break; case -2: // end of stream return("#EOS"); } // Here's where we run most of the match. FillMove runs one interation of the NFA state machine. // We match until we can't match anymore (greedy matching) and then report the symbol of the last // match we found, or an error ("#ERROR") if we couldn't find one. while (true) { var next = FA.FillMove(states, _input.Current); if (0 == next.Count) // couldn't find any states { break; } _buffer.Append(_input.Current); states = next; if (!_MoveNextInput()) { // end of stream _state = -2; acc = _GetAcceptingSymbol(states); if (null != acc) // do we accept? { return(acc); } else { return("#ERROR"); } } } acc = _GetAcceptingSymbol(states); if (null != acc) // do we accept? { return(acc); } else { // handle the error condition _buffer.Append(_input.Current); if (!_MoveNextInput()) { _state = -2; } return("#ERROR"); } }