public void CreateUtf8Diagram(string regex, bool caseSensitive) { var expression = RegexParser.Parse(regex, 0); var mapper = new UnicodeUtf8Mapper(false, false); var rxCodepoint = expression.ToInvariant(mapper, new UnicodeCharSetProvider(), caseSensitive); var nfa = NfaBuilder <byte> .Build(rxCodepoint.Optimize()); var dfa = DfaBuilder <byte> .Build(nfa, 0xFF); NfaFactoryTest.WriteDiagram(this.output, nfa); WriteDiagram(this.output, dfa); }
public void Actions() { var symbolA = 1; var regexA = "a"; var r1 = RegexParser.Parse(regexA, symbolA); var symbolB = 2; var regexB = "b"; var r2 = RegexParser.Parse(regexB, symbolB); RegexExpression r = new RegexAlternation(r1, r2); var mapper = new UnicodeCodepointMapper(false, false); var rxCodepoint = r.ToInvariant(mapper, new UnicodeCharSetProvider(), true); var nfa = NfaBuilder <Codepoint> .Build(rxCodepoint.Optimize()); NfaFactoryTest.WriteDiagram(this.output, nfa); var dfa = DfaBuilder <Codepoint> .Build(nfa, Codepoints.EOF); WriteDiagram(this.output, dfa); var state = dfa.StartState; foreach (var codepoint in regexA.ToCodepoints().Append(Codepoints.EOF)) { var nextStateId = state.GetTransition(codepoint); Assert.NotEqual(Dfa <Codepoint> .Reject, nextStateId); if (nextStateId == Dfa <Codepoint> .Accept) { Assert.Equal(dfa.SymbolStates[state.Id], symbolA); break; } state = dfa.GetState(nextStateId); Assert.NotNull(state); } state = dfa.StartState; foreach (var codepoint in regexB.ToCodepoints().Append(Codepoints.EOF)) { var nextStateId = state.GetTransition(codepoint); Assert.NotEqual(Dfa <Codepoint> .Reject, nextStateId); if (nextStateId == Dfa <Codepoint> .Accept) { Assert.Equal(dfa.SymbolStates[state.Id], symbolB); break; } state = dfa.GetState(nextStateId); Assert.NotNull(state); } }