public static NFA Create(ShuntingYard yard) { var stack = new Stack <NFA>(); foreach (var token in yard.ShuntedTokens()) { switch (token.Type) { case RegExToken.TokenType.OperatorMul: stack.Push(RepeatZeroOrMore(stack.Pop())); break; case RegExToken.TokenType.OperatorQuestion: stack.Push(RepeatZeroOrOnce(stack.Pop())); break; case RegExToken.TokenType.OperatorOr: stack.Push(Or(stack.Pop(), stack.Pop())); break; case RegExToken.TokenType.OperatorPlus: stack.Push(RepeatOnceOrMore(stack.Pop())); break; case RegExToken.TokenType.Accept: stack.Push(Accept(token.Characters)); break; case RegExToken.TokenType.OperatorConcat: // & is not commutative, and the stack is reversed. var second = stack.Pop(); var first = stack.Pop(); stack.Push(And(first, second)); break; case RegExToken.TokenType.NumberedRepeat: stack.Push(NumberedRepeat(stack.Pop(), token.MinRepetitions, token.MaxRepetitions)); break; default: throw new LexerConstructionException("Unknown operator!"); } } // We should end up with only ONE NFA on the stack or the expression // is malformed. if (stack.Count() != 1) { throw new LexerConstructionException("Malformed regexp expression"); } // Pop it from the stack, and assign each state a number, primarily for debugging purposes, // they dont _really_ need it. The state numbers actually used are the one used in the DFA. var nfa = stack.Pop(); nfa.AssignStateNumbers(); return(nfa); }
public static NFA Create(ShuntingYard yard) { var stack = new Stack<NFA>(); foreach (var token in yard.ShuntedTokens()) { switch (token.Type) { case RegExToken.TokenType.OperatorMul: stack.Push(RepeatZeroOrMore(stack.Pop())); break; case RegExToken.TokenType.OperatorQuestion: stack.Push(RepeatZeroOrOnce(stack.Pop())); break; case RegExToken.TokenType.OperatorOr: stack.Push(Or(stack.Pop(), stack.Pop())); break; case RegExToken.TokenType.OperatorPlus: stack.Push(RepeatOnceOrMore(stack.Pop())); break; case RegExToken.TokenType.Accept: stack.Push(Accept(token.Characters)); break; case RegExToken.TokenType.OperatorConcat: // & is not commutative, and the stack is reversed. var second = stack.Pop(); var first = stack.Pop(); stack.Push(And(first, second)); break; case RegExToken.TokenType.NumberedRepeat: stack.Push(NumberedRepeat(stack.Pop(), token.MinRepetitions, token.MaxRepetitions)); break; default: throw new LexerConstructionException("Unknown operator!"); } } // We should end up with only ONE NFA on the stack or the expression // is malformed. if (stack.Count() != 1) { throw new LexerConstructionException("Malformed regexp expression"); } // Pop it from the stack, and assign each state a number, primarily for debugging purposes, // they dont _really_ need it. The state numbers actually used are the one used in the DFA. var nfa = stack.Pop(); nfa.AssignStateNumbers(); return nfa; }
public static NFA Create(ShuntingYard yard, bool ignorecase) { Stack <NFA> stack = new Stack <NFA>(); foreach (RegExToken token in yard.ShuntedTokens(ignorecase)) { try { switch (token.Type) { case RegExToken.TokenType.OperatorMul: stack.Push(RepeatZeroOrMore(stack.Pop())); break; case RegExToken.TokenType.OperatorQuestion: stack.Push(RepeatZeroOrOnce(stack.Pop())); break; case RegExToken.TokenType.OperatorOr: stack.Push(Or(stack.Pop(), stack.Pop())); break; case RegExToken.TokenType.OperatorPlus: stack.Push(RepeatOnceOrMore(stack.Pop())); break; case RegExToken.TokenType.Accept: stack.Push(Accept(token.Characters)); break; case RegExToken.TokenType.OperatorConcat: // & is not commutative, and the stack is reversed. NFA second = stack.Pop(); NFA first = stack.Pop(); stack.Push(And(first, second)); break; case RegExToken.TokenType.NumberedRepeat: stack.Push(NumberedRepeat(stack.Pop(), token.MinRepetitions, token.MaxRepetitions)); break; default: throw new LexerConstructionException("Unknown operator!"); } } catch (InvalidOperationException) when(stack.Count == 0) // stack popping failed { FieldInfo field = typeof(StringReader).GetField("_s", BindingFlags.Instance | BindingFlags.NonPublic); TextReader reader = yard.lexer.input; throw new LexerConstructionException($"Malformed regexp expression: '{field.GetValue(reader)}'"); } } // We should end up with only ONE NFA on the stack or the expression is malformed. if (stack.Count != 1) { throw new LexerConstructionException("Malformed regexp expression!"); } // Pop it from the stack, and assign each state a number, primarily for debugging purposes, // they dont _really_ need it. The state numbers actually used are the one used in the DFA. NFA nfa = stack.Pop(); nfa.AssignStateNumbers(); return(nfa); }