public void SubWordMatchTest() { var dfa = new FiniteAutomata("test", true); var state1 = new State("1"); var state2 = new State("2"); var state3 = new State("3"); // Words: "abc", "ab" dfa.StartState.AddTransitionTo(state1, InputChar.For(CODE_A)); state1.AddTransitionTo(state2, InputChar.For(CODE_B)); state2.AddTransitionTo(state3, InputChar.For(CODE_C)); state3.IsAccepting = true; state2.IsAccepting = true; state3.TokenClass = 0x0ABC; state2.TokenClass = 0x00AB; transitionFunction.Init(dfa); byte[] input = Encoding.ASCII.GetBytes("abc"); int tokenClass; int tokenLength = transitionFunction.MatchToken(input, 0, input.Length, out tokenClass); Assert.That(tokenClass, Is.EqualTo(0x0ABC)); Assert.That(tokenLength, Is.EqualTo(3)); }
public void BackEpsilonTransitionTest() { // 1 2 3 // (S) ---> (A) ---> (B) ---> [T] // \______/ // e var stateA = new State("A"); var stateB = new State("B"); var nfa = new FiniteAutomata("nfa"); nfa.StartState.AddTransitionTo(stateA, InputChar.For(0x01)); stateA.AddTransitionTo(stateB, InputChar.For(0x02)); stateB.AddTransitionTo(nfa.Terminator, InputChar.For(0x03)); stateB.AddTransitionTo(stateA, InputChar.Epsilon()); nfa.Terminator.IsAccepting = true; var dfa = NFAToDFAConverter.Convert(nfa); ValidateTerminator(dfa); Assert.That(dfa.StartState.Simulate(0, 1, 2, 3), Is.SameAs(dfa.Terminator)); Assert.That(dfa.StartState.Simulate(0, 1, 2, 2, 3), Is.SameAs(dfa.Terminator)); Assert.That(dfa.StartState.Simulate(0, 1, 2, 2, 2, 3), Is.SameAs(dfa.Terminator)); Assert.That(dfa.StartState.Simulate(0, 1, 2, 2, 2, 2, 3), Is.SameAs(dfa.Terminator)); Assert.Throws<SimulationException>(() => dfa.StartState.Simulate(0, 1, 2, 4)); Assert.Throws<SimulationException>(() => dfa.StartState.Simulate(0, 1, 2, 3, 2)); Assert.That(dfa.StartState.Simulate(0, 1, 2, 2), Is.Not.SameAs(dfa.Terminator)); }
public void CppCommentMatchTest() { var dfaAnyChar = new FiniteAutomata("any-char", false); dfaAnyChar.StartState = dfaAnyChar.Terminator = new State("start") {Id = 0}; dfaAnyChar.StartState.IsAccepting = true; for (int b = 0; b <= Byte.MaxValue; b++) { dfaAnyChar.StartState.AddTransitionTo(dfaAnyChar.StartState, InputChar.For((byte) b)); } var dfaSuffix = new FiniteAutomata("suffix", true); var stateAsterisk = new State("asterisk") {Id = 1}; dfaSuffix.StartState.Id = 0; dfaSuffix.StartState.AddTransitionTo(stateAsterisk, InputChar.For((byte) '*')); stateAsterisk.AddTransitionTo(dfaSuffix.Terminator, InputChar.For((byte) '/')); dfaSuffix.Terminator.IsAccepting = true; dfaSuffix.Terminator.Id = 2; var crossAutomata = new CrossAutomata(dfaAnyChar, dfaSuffix); var simulatedState = crossAutomata.StartState.Simulate("abcdefgh*/"); Assert.That(simulatedState.IsAccepting); Assert.Throws<SimulationException>(() => crossAutomata.StartState.Simulate("abcde*/abcde")); Assert.Throws<SimulationException>(() => crossAutomata.StartState.Simulate("abcde*/abcde*/")); }
internal FiniteAutomata(string name, bool createMarginalStates) { if (createMarginalStates) { StartState = new State(name + "::StartState"); Terminator = new State(name + "::Terminator"); } Name = name; }
/// <summary> /// Lexical action, performed when token is accepted. The return value is indicator that /// determines whether to pass the token to parser or ignore it. The return value overrides /// UseTerminal() and IgnoreTerminal() methods of Lexer. /// </summary> /// <summary> /// Adds transition to given state on input char /// </summary> /// <param name="state">Target state for transition</param> /// <param name="iChar">Input char for transition</param> internal void AddTransitionTo(State state, InputChar iChar) { List<State> states; if (state == null) return; if (Transitions.TryGetValue(iChar, out states)) { states.Add(state); } else { Transitions[iChar] = new List<State> {state}; } }
public void DFAConversionTest() { var dfa = new FiniteAutomata("dfa"); var state1 = new State("1"); var state2 = new State("2"); var state3 = new State("3"); var state4 = new State("4"); var state5 = new State("5"); dfa.StartState.AddTransitionTo(state1, InputChar.For(0x01)); dfa.StartState.AddTransitionTo(state2, InputChar.For(0x02)); dfa.StartState.AddTransitionTo(state3, InputChar.For(0x03)); dfa.StartState.AddTransitionTo(state3, InputChar.For(0x04)); state1.AddTransitionTo(state4, InputChar.For(0x05)); state2.AddTransitionTo(state4, InputChar.For(0x05)); state3.AddTransitionTo(state4, InputChar.For(0x05)); state1.AddTransitionTo(state5, InputChar.For(0x06)); state2.AddTransitionTo(state5, InputChar.For(0x06)); state3.AddTransitionTo(state5, InputChar.For(0x06)); state4.AddTransitionTo(dfa.Terminator, InputChar.For(0x07)); state5.AddTransitionTo(dfa.Terminator, InputChar.For(0x07)); dfa.Terminator.IsAccepting = true; var resultDfa = NFAToDFAConverter.Convert(dfa); ValidateTerminator(resultDfa); Assert.That(resultDfa.GetStates().Count, Is.EqualTo(7)); //var state = resultDfa.StartState.Simulate(0, 1, 5, 7); Assert.That(resultDfa.StartState.Simulate(0, 1, 5, 7), Is.SameAs(resultDfa.Terminator)); Assert.That(resultDfa.StartState.Simulate(0, 2, 5, 7), Is.SameAs(resultDfa.Terminator)); Assert.That(resultDfa.StartState.Simulate(0, 3, 5, 7), Is.SameAs(resultDfa.Terminator)); Assert.That(resultDfa.StartState.Simulate(0, 4, 5, 7), Is.SameAs(resultDfa.Terminator)); Assert.That(resultDfa.StartState.Simulate(0, 1, 6, 7), Is.SameAs(resultDfa.Terminator)); Assert.That(resultDfa.StartState.Simulate(0, 2, 6, 7), Is.SameAs(resultDfa.Terminator)); Assert.That(resultDfa.StartState.Simulate(0, 3, 6, 7), Is.SameAs(resultDfa.Terminator)); Assert.That(resultDfa.StartState.Simulate(0, 4, 6, 7), Is.SameAs(resultDfa.Terminator)); Assert.Throws<SimulationException>(() => resultDfa.StartState.Simulate(0, 1, 5, 6)); Assert.Throws<SimulationException>(() => resultDfa.StartState.Simulate(0, 2, 5, 8)); Assert.Throws<SimulationException>(() => resultDfa.StartState.Simulate(0, 3, 5, 6)); Assert.Throws<SimulationException>(() => resultDfa.StartState.Simulate(0, 4, 6, 6)); }
public void AmbiguityQuantifierTest() { // This test checks the following regexp: (aa|bb)*?aab var repeatedDfa = new FiniteAutomata("repeated", true); var stateA = new State("A") { Id = 1 }; var stateB = new State("B") { Id = 2 }; repeatedDfa.StartState.Id = 0; repeatedDfa.Terminator.Id = 4; repeatedDfa.StartState.AddTransitionTo(stateA, InputChar.For((byte)'a')); repeatedDfa.StartState.AddTransitionTo(stateB, InputChar.For((byte)'b')); stateA.AddTransitionTo(repeatedDfa.Terminator, InputChar.For((byte)'a')); stateB.AddTransitionTo(repeatedDfa.Terminator, InputChar.For((byte)'b')); repeatedDfa.Terminator.IsAccepting = true; var suffixDfa = new FiniteAutomata("suffix", true); var midState1 = new State("mid1") { Id = 1 }; var midState2 = new State("mid2") {Id = 2}; suffixDfa.StartState.Id = 0; suffixDfa.Terminator.Id = 3; suffixDfa.StartState.AddTransitionTo(midState1, InputChar.For((byte)'a')); midState1.AddTransitionTo(midState2, InputChar.For((byte)'a')); midState2.AddTransitionTo(suffixDfa.Terminator, InputChar.For((byte)'b')); suffixDfa.Terminator.IsAccepting = true; var crossAutomata = new CrossAutomata(repeatedDfa, suffixDfa); var simulatedState = crossAutomata.StartState.Simulate("aab"); Assert.That(simulatedState.IsAccepting); simulatedState = crossAutomata.StartState.Simulate("bbaab"); Assert.That(simulatedState.IsAccepting); Assert.Throws<SimulationException>(() => crossAutomata.StartState.Simulate("aabbaab")); Assert.Pass("Note: Some asserts were ignored"); // In order o make these tests pass a more inteligent lazy quantifier mechanism // should be developed. When there is a transition in repeatedDfa and no transition // in suffixDfa you can't simply go to the begginning of suffixDfa since there could // be some input is already consumed for suffixDfa. Hence what is needed is concurrent // simulation of many suffixDfa automatas. This point is still to be solved. // TODO: Solve it simulatedState = crossAutomata.StartState.Simulate("aaaab"); Assert.That(simulatedState.IsAccepting); simulatedState = crossAutomata.StartState.Simulate("bbaaaab"); Assert.That(simulatedState.IsAccepting); }
public void TokenPrioritizationTest() { // e a-z e // (S) ---> (1) ---> (2) ---> [T] // \ \___e__/ / // \___________e__________/ var state1 = new State("1"); var state2 = new State("2"); var nfa1 = new FiniteAutomata("nfa"); nfa1.StartState.AddTransitionTo(state1, InputChar.Epsilon()); for (char c = 'a'; c <= 'z'; c++ ) { state1.AddTransitionTo(state2, InputChar.For((byte)c)); } state2.AddTransitionTo(nfa1.Terminator, InputChar.Epsilon()); state2.AddTransitionTo(state1, InputChar.Epsilon()); nfa1.StartState.AddTransitionTo(nfa1.Terminator, InputChar.Epsilon()); nfa1.Terminator.IsAccepting = true; nfa1.Terminator.TokenClass = 1; // least priority, if defined early gets small token class // a b c // (S) ---> (3) ---> (4) ---> [T] var state3 = new State("3"); var state4 = new State("4"); var nfa2 = new FiniteAutomata("nfa"); nfa1.StartState.AddTransitionTo(state3, InputChar.For((byte)'a')); state3.AddTransitionTo(state4, InputChar.For((byte)'b')); state4.AddTransitionTo(nfa1.Terminator, InputChar.For((byte)'c')); nfa1.Terminator.IsAccepting = true; nfa1.Terminator.TokenClass = 2; // most priority var nfa = new FiniteAutomata("CommonAutomata"); nfa.StartState.AddTransitionTo(nfa1.StartState, InputChar.Epsilon()); nfa.StartState.AddTransitionTo(nfa2.StartState, InputChar.Epsilon()); var dfa = NFAToDFAConverter.Convert(nfa); Assert.That(dfa.StartState.Simulate("abc").IsAccepting); Assert.That(dfa.StartState.Simulate("abc").TokenClass, Is.EqualTo(2)); }
public void PrefixedSuffixedKleeneClosureTest() { // 1 e e 2 e e 1 // (S) ---> (0) ---> (1) ---> (2) ---> (3) ---> (4) ---> (5) ---> [T] // \ \___e__/ / // \___________e__________/ var state0 = new State("0"); var state1 = new State("1"); var state2 = new State("2"); var state3 = new State("3"); var state4 = new State("4"); var state5 = new State("5"); var nfa = new FiniteAutomata("nfa"); nfa.StartState.AddTransitionTo(state0, InputChar.For(1)); state0.AddTransitionTo(state1, InputChar.Epsilon()); state1.AddTransitionTo(state2, InputChar.Epsilon()); state2.AddTransitionTo(state3, InputChar.For(2)); state3.AddTransitionTo(state4, InputChar.Epsilon()); state4.AddTransitionTo(state5, InputChar.Epsilon()); state5.AddTransitionTo(nfa.Terminator, InputChar.For(1)); state1.AddTransitionTo(state4, InputChar.Epsilon()); state3.AddTransitionTo(state2, InputChar.Epsilon()); nfa.Terminator.IsAccepting = true; var dfa = NFAToDFAConverter.Convert(nfa); Assert.That(dfa.StartState.Simulate(0, 1, 1).IsAccepting); Assert.That(dfa.StartState.Simulate(0, 1, 2, 1).IsAccepting); Assert.That(dfa.StartState.Simulate(0, 1, 2, 2, 1).IsAccepting); Assert.That(dfa.StartState.Simulate(0, 1, 2, 2, 2, 1).IsAccepting); Assert.That(dfa.StartState.Simulate(0, 1, 2, 2, 2, 2, 1).IsAccepting); }
public void PrefixedKleeneClosureTest() { // 1 e 2 e // (S) ---> (0) ---> (1) ---> (2) ---> [T] // \ \___e__/ / // \___________e__________/ var state0 = new State("0"); var stateA = new State("A"); var stateB = new State("B"); var nfa = new FiniteAutomata("nfa"); nfa.StartState.AddTransitionTo(state0, InputChar.For(1)); state0.AddTransitionTo(stateA, InputChar.Epsilon()); stateA.AddTransitionTo(stateB, InputChar.For(2)); stateB.AddTransitionTo(nfa.Terminator, InputChar.Epsilon()); stateB.AddTransitionTo(stateA, InputChar.Epsilon()); state0.AddTransitionTo(nfa.Terminator, InputChar.Epsilon()); nfa.Terminator.IsAccepting = true; var dfa = NFAToDFAConverter.Convert(nfa); Assert.That(dfa.StartState.Simulate(0, 1).IsAccepting); Assert.That(dfa.StartState.Simulate(0, 1, 2).IsAccepting); Assert.That(dfa.StartState.Simulate(0, 1, 2, 2).IsAccepting); Assert.That(dfa.StartState.Simulate(0, 1, 2, 2, 2).IsAccepting); Assert.That(dfa.StartState.Simulate(0, 1, 2, 2, 2, 2).IsAccepting); }
public void KleeneClosureTest() { // e 2 e // (S) ---> (1) ---> (2) ---> [T] // \ \___e__/ / // \___________e__________/ var stateA = new State("A"); var stateB = new State("B"); var nfa = new FiniteAutomata("nfa"); nfa.StartState.AddTransitionTo(stateA, InputChar.Epsilon()); stateA.AddTransitionTo(stateB, InputChar.For(2)); stateB.AddTransitionTo(nfa.Terminator, InputChar.Epsilon()); stateB.AddTransitionTo(stateA, InputChar.Epsilon()); nfa.StartState.AddTransitionTo(nfa.Terminator, InputChar.Epsilon()); nfa.Terminator.IsAccepting = true; var dfa = NFAToDFAConverter.Convert(nfa); Assert.That(dfa.StartState.Simulate().IsAccepting); Assert.That(dfa.StartState.Simulate(0, 2).IsAccepting); Assert.That(dfa.StartState.Simulate(0, 2, 2).IsAccepting); Assert.That(dfa.StartState.Simulate(0, 2, 2, 2).IsAccepting); Assert.That(dfa.StartState.Simulate(0, 2, 2, 2, 2).IsAccepting); // The result DFA has the following form: // ___ // 1 / \ // [S] ---> [T] | 1 // \___/ // However, generally speaking it should be single accepting state with self transition on '1' // Note that both S and T are accepting states }
public void EpsilonTransitionsConversionTest() { // 1 2 4 // (S) ---> (A) ---> (B) ---> [T] // | // ---> (C) ---> (D) ---> [T] // e 3 4 var stateA = new State("A"); var stateB = new State("B"); var stateC = new State("C"); var stateD = new State("D"); var nfa = new FiniteAutomata(); nfa.StartState.AddTransitionTo(stateA, InputChar.For(1)); stateA.AddTransitionTo(stateB, InputChar.For(2)); stateA.AddTransitionTo(stateC, InputChar.Epsilon()); stateC.AddTransitionTo(stateD, InputChar.For(3)); stateB.AddTransitionTo(nfa.Terminator, InputChar.For(4)); stateD.AddTransitionTo(nfa.Terminator, InputChar.For(4)); nfa.Terminator.IsAccepting = true; var dfa = NFAToDFAConverter.Convert(nfa); ValidateTerminator(dfa); Assert.That(dfa.GetStates().Count, Is.EqualTo(5)); Assert.That(dfa.StartState.Simulate(0, 1, 2, 4), Is.SameAs(dfa.Terminator)); Assert.That(dfa.StartState.Simulate(0, 1, 3, 4), Is.SameAs(dfa.Terminator)); Assert.Throws<SimulationException>(() => dfa.StartState.Simulate(0, 2, 2, 4)); Assert.Throws<SimulationException>(() => dfa.StartState.Simulate(0, 1, 2, 5)); Assert.Throws<SimulationException>(() => dfa.StartState.Simulate(0, 1, 3, 2)); }
public void GeneralLazyQuantifierTest() { // (aa|bb|cc) var repeatedDfa = new FiniteAutomata("repeated", true); var stateA = new State("A") { Id = 1 }; var stateB = new State("B") { Id = 2 }; var stateC = new State("C") { Id = 3 }; repeatedDfa.StartState.Id = 0; repeatedDfa.Terminator.Id = 4; repeatedDfa.StartState.AddTransitionTo(stateA, InputChar.For((byte)'a')); repeatedDfa.StartState.AddTransitionTo(stateB, InputChar.For((byte)'b')); repeatedDfa.StartState.AddTransitionTo(stateC, InputChar.For((byte)'c')); stateA.AddTransitionTo(repeatedDfa.Terminator, InputChar.For((byte)'a')); stateB.AddTransitionTo(repeatedDfa.Terminator, InputChar.For((byte)'b')); stateC.AddTransitionTo(repeatedDfa.Terminator, InputChar.For((byte)'c')); repeatedDfa.Terminator.IsAccepting = true; // ab var suffixDfa = new FiniteAutomata("suffix", true); var midState = new State("mid") { Id = 1 }; suffixDfa.StartState.Id = 0; suffixDfa.Terminator.Id = 2; suffixDfa.StartState.AddTransitionTo(midState, InputChar.For((byte) 'a')); midState.AddTransitionTo(suffixDfa.Terminator, InputChar.For((byte) 'b')); suffixDfa.Terminator.IsAccepting = true; // (aa|bb|cc)*?ab var crossAutomata = new CrossAutomata(repeatedDfa, suffixDfa); var simulatedState = crossAutomata.StartState.Simulate("aa"); Assert.That(simulatedState, Is.SameAs(crossAutomata.StartState)); simulatedState = crossAutomata.StartState.Simulate("bb"); Assert.That(simulatedState, Is.SameAs(crossAutomata.StartState)); simulatedState = crossAutomata.StartState.Simulate("cc"); Assert.That(simulatedState, Is.SameAs(crossAutomata.StartState)); simulatedState = crossAutomata.StartState.Simulate("aabbccbbaa"); Assert.That(simulatedState, Is.SameAs(crossAutomata.StartState)); simulatedState = crossAutomata.StartState.Simulate("ccaabbccaabbab"); Assert.That(simulatedState.IsAccepting); simulatedState = crossAutomata.StartState.Simulate("ab"); Assert.That(simulatedState.IsAccepting); simulatedState = crossAutomata.StartState.Simulate("ccaabbccaabba"); Assert.That(!simulatedState.IsAccepting); Assert.Throws<SimulationException>(() => crossAutomata.StartState.Simulate("ccaabbccaabbabab")); }
public void InitTests() { stateA = new State("A"); stateB = new State("B"); stateC = new State("C"); stateD = new State("D"); stateE = new State("E"); stateF = new State("F"); mFiniteAutomata = new FiniteAutomata("Test") { StartState = new State("start"), Terminator = new State("terminator") }; expectedSet = new HashSet<State>() { mFiniteAutomata.StartState, stateA, stateB, stateC, stateD, stateE, stateF, mFiniteAutomata.Terminator }; }
private static State SimulateNFA(State state, params byte[] inputChars) { byte?[] nullableBytes = new byte?[inputChars.Length]; for (int i = 0; i < inputChars.Length; i++) { nullableBytes[i] = inputChars[i]; } return state.Simulate(0, nullableBytes); }