public static NFA RepeatZeroOrMore(NFA input) { NFA nfa = new NFA(); // Add everything from the input nfa.AddAll(input); // Create a new starting state, link it to the old accept state with Epsilon nfa.StartState = new NFA.State(); nfa.States.Add(nfa.StartState); NFA.State oldAcceptState = input.States.First(f => f.AcceptState); nfa.Transitions.Add(new Transition <NFA.State>(nfa.StartState, oldAcceptState)); // Add epsilon link from old accept state of input to start, to allow for repetition nfa.Transitions.Add(new Transition <NFA.State>(oldAcceptState, input.StartState)); // Create new accept state, link old accept state to new accept state with epsilon NFA.State acceptState = new NFA.State { AcceptState = true }; nfa.States.Add(acceptState); oldAcceptState.AcceptState = false; nfa.Transitions.Add(new Transition <NFA.State>(oldAcceptState, acceptState)); return(nfa); }
protected internal NFA Copy() { NFA newNFA = new NFA(); Dictionary <State, State> stateMap = new Dictionary <State, State>(); foreach (State state in States) { State newState = new State { AcceptState = state.AcceptState, StateNumber = state.StateNumber }; stateMap.Add(state, newState); newNFA.States.Add(newState); } foreach (Transition <State> transition in Transitions) { // Hard copy the valid input Transition <State> newTransition = new Transition <State>(stateMap[transition.From], stateMap[transition.To], transition.ValidInput); newNFA.Transitions.Add(newTransition); } newNFA.StartState = stateMap[StartState]; return(newNFA); }
private static NFA RepeatZeroOrOnce(NFA nfa) { // Easy enough, add an epsilon transition from the start state to the end state. Done nfa.Transitions.Add(new Transition <NFA.State>(nfa.StartState, nfa.States.First(f => f.AcceptState))); return(nfa); }
public static NFA Or(NFA a, NFA b) { NFA nfa = new NFA(); // Composite NFA contains all the and all edges in both NFAs nfa.AddAll(a); nfa.AddAll(b); // Add a start state, link to both NFAs old start state with // epsilon links nfa.StartState = new NFA.State(); nfa.States.Add(nfa.StartState); nfa.Transitions.Add(new Transition <NFA.State>(nfa.StartState, a.StartState)); nfa.Transitions.Add(new Transition <NFA.State>(nfa.StartState, b.StartState)); // Add a new accept state, link all old accept states to the new accept // state with an epsilon link and remove the accept flag NFA.State newAcceptState = new NFA.State { AcceptState = true }; foreach (NFA.State oldAcceptState in nfa.States.Where(f => f.AcceptState)) { oldAcceptState.AcceptState = false; nfa.Transitions.Add(new Transition <NFA.State>(oldAcceptState, newAcceptState)); } nfa.States.Add(newAcceptState); return(nfa); }
protected internal void AddAll(NFA nfa) { foreach (State state in nfa.States) { States.Add(state); } foreach (Transition <State> edge in nfa.Transitions) { Transitions.Add(edge); } }
private static NFA NumberedRepeat(NFA nfa, int minRepetitions, int maxRepetitions) { // To create a suitable expression, the special case of infinite max repetitions // must be separately handled. bool infiniteMax = false; if (maxRepetitions == int.MaxValue) { infiniteMax = true; maxRepetitions = minRepetitions; } else if (maxRepetitions < minRepetitions) { maxRepetitions = minRepetitions; } // Copy the NFA max repetitions times, link them together. NFA output = nfa.Copy(); Stack <NFA.State> epsilonLinkStates = new Stack <NFA.State>(); for (int i = 1; i < maxRepetitions; ++i) { NFA newNfa = nfa.Copy(); if (i >= minRepetitions || (infiniteMax && i == maxRepetitions - 1)) { epsilonLinkStates.Push(newNfa.StartState); } output = And(output, newNfa); } if (infiniteMax) { // Use Single to force an exception if this has gone astray NFA.State finalState = epsilonLinkStates.Single(); // Make a little epsilon loop from the final accept state to the start state of the final state output.Transitions.Add(new Transition <NFA.State>(output.States.Single(f => f.AcceptState), finalState)); } else { // Add epsilon transitions from accept to beginning states of NFAs in the chain NFA.State acceptState = output.States.Single(f => f.AcceptState); while (epsilonLinkStates.Count > 0) { output.Transitions.Add(new Transition <NFA.State>(epsilonLinkStates.Pop(), acceptState)); } } return(output); }
private static NFA RepeatOnceOrMore(NFA nfa) { // Add an epsilon transition from the accept state back to the start state NFA.State oldAcceptState = nfa.States.First(f => f.AcceptState); nfa.Transitions.Add(new Transition <NFA.State>(oldAcceptState, nfa.StartState)); // Add a new accept state, since we cannot have edges exiting the accept state NFA.State newAcceptState = new NFA.State { AcceptState = true }; nfa.Transitions.Add(new Transition <NFA.State>(oldAcceptState, newAcceptState)); nfa.States.Add(newAcceptState); // Clear the accept flag of the old accept state oldAcceptState.AcceptState = false; return(nfa); }
private static NFA Accept(CharSet acceptCharacters) { // Generate a NFA with a simple path with one state transitioning into an accept state. NFA nfa = new NFA(); NFA.State state = new NFA.State(); nfa.States.Add(state); NFA.State acceptState = new NFA.State { AcceptState = true }; nfa.States.Add(acceptState); nfa.Transitions.Add(new Transition <NFA.State>(state, acceptState, acceptCharacters)); nfa.StartState = state; return(nfa); }
public static NFA Merge(IList <NFA> nfas) { // Create a new NFA, add everything to it. NFA merged = new NFA(); foreach (NFA nfa in nfas) { merged.AddAll(nfa); } // Add a new start state State state = new State(); merged.States.Add(state); merged.StartState = state; // Add epsilon transiontions from the start state to all the previous start states foreach (NFA nfa in nfas) { merged.Transitions.Add(new Transition <State>(state, nfa.StartState)); } return(merged); }
public static NFA And(NFA first, NFA second) { // Create a new NFA and use the first NFAs start state as the starting point NFA nfa = new NFA { StartState = first.StartState }; // Change all links in to first acceptstate to go to seconds // start state foreach (Transition <NFA.State> edge in first.Transitions.Where(f => f.To.AcceptState)) { edge.To = second.StartState; } // Remove acceptstate from first first.States.Remove(first.States.First(f => f.AcceptState)); // Add all states and edges in both NFAs // Second NFA already has an accept state, there is no need to create another one nfa.AddAll(first); nfa.AddAll(second); return(nfa); }
public static NFA Create(ShuntingYard yard, bool ignorecase) { Stack <NFA> stack = new Stack <NFA>(); foreach (RegExToken token in yard.ShuntedTokens(ignorecase)) { try { switch (token.Type) { case RegExToken.TokenType.OperatorMul: stack.Push(RepeatZeroOrMore(stack.Pop())); break; case RegExToken.TokenType.OperatorQuestion: stack.Push(RepeatZeroOrOnce(stack.Pop())); break; case RegExToken.TokenType.OperatorOr: stack.Push(Or(stack.Pop(), stack.Pop())); break; case RegExToken.TokenType.OperatorPlus: stack.Push(RepeatOnceOrMore(stack.Pop())); break; case RegExToken.TokenType.Accept: stack.Push(Accept(token.Characters)); break; case RegExToken.TokenType.OperatorConcat: // & is not commutative, and the stack is reversed. NFA second = stack.Pop(); NFA first = stack.Pop(); stack.Push(And(first, second)); break; case RegExToken.TokenType.NumberedRepeat: stack.Push(NumberedRepeat(stack.Pop(), token.MinRepetitions, token.MaxRepetitions)); break; default: throw new LexerConstructionException("Unknown operator!"); } } catch (InvalidOperationException) when(stack.Count == 0) // stack popping failed { FieldInfo field = typeof(StringReader).GetField("_s", BindingFlags.Instance | BindingFlags.NonPublic); TextReader reader = yard.lexer.input; throw new LexerConstructionException($"Malformed regexp expression: '{field.GetValue(reader)}'"); } } // We should end up with only ONE NFA on the stack or the expression is malformed. if (stack.Count != 1) { throw new LexerConstructionException("Malformed regexp expression!"); } // Pop it from the stack, and assign each state a number, primarily for debugging purposes, // they dont _really_ need it. The state numbers actually used are the one used in the DFA. NFA nfa = stack.Pop(); nfa.AssignStateNumbers(); return(nfa); }
public static DFA Create(NFA nfa) { var closures = nfa.GetAllClosures(); // The valid input ranges that the NFA contains will need to be split up so that // the smallest possible units which NEVER overlaps will be contained in each of the // states nfa.DistinguishValidInputs(); // Get the closure set of S0 var dfa = new DFA(); dfa.States.Add(new State(closures[nfa.StartState])); while (true) { // Get an unmarked state in dfaStates var t = dfa.States.FirstOrDefault(f => !f.Mark); if (null == t) { // We're done! break; } t.Mark = true; // Get the move states by stimulating this DFA state with // all possible characters. var fromTransitions = nfa.Transitions.Where(f => t.NfaStates.Contains(f.From)).ToArray(); var moveDestinations = new Dictionary <CharRange, List <NFA.State> >(); foreach (var fromTransition in fromTransitions) { foreach (var range in fromTransition.ValidInput.Ranges) { List <NFA.State> destList; if (!moveDestinations.TryGetValue(range, out destList)) { destList = new List <NFA.State>(); moveDestinations.Add(range, destList); } destList.Add(fromTransition.To); } } foreach (CharRange c in t.LegalMoves(fromTransitions)) { var moveSet = moveDestinations[c]; if (moveSet.Any()) { // Get the closure of the move set. This is the NFA states that will form the new set ISet <NFA.State> moveClosure = new HashSet <NFA.State>(); foreach (var moveState in moveSet) { moveClosure.UnionWith(closures[moveState]); } var newState = new State(moveClosure); // See if the new state already exists. If so change the reference to point to // the already created object, since we will need to add a transition back to the same object var oldState = dfa.States.FirstOrDefault(f => f.NfaStates.SetEquals(newState.NfaStates));/* f.NfaStates.Count == newState.NfaStates.Count && * !f.NfaStates.Except(newState.NfaStates).Any() && * !newState.NfaStates.Except(f.NfaStates).Any());*/ if (oldState == null) { dfa.States.Add(newState); } else { // New state wasn't that new. We already have one exacly like it in the DFA. Set // netstate to oldstate so that the created transition will be correct (still need to // create a transition) newState = oldState; } // See if there already is a transition. In that case, add our character to the list // of valid values var transition = dfa.Transitions.SingleOrDefault(f => f.From == t && f.To == newState); if (transition == null) { // No transition has been found. Create a new one. transition = new Transition <State>(t, newState); dfa.Transitions.Add(transition); } transition.ValidInput.AddRange(c.From, c.To, false); } } } dfa.StartState = dfa.States[0]; dfa.AssignStateNumbers(); return(dfa); }