public void MatchTest(string regex, bool caseSensitive, string input, bool match) { var expression = RegexParser.Parse(regex, 0); var mapper = new UnicodeCodepointMapper(false, false); var rxCodepoint = expression.ToInvariant(mapper, new UnicodeCharSetProvider(), caseSensitive); var nfa = NfaBuilder <Codepoint> .Build(rxCodepoint.Optimize()); var dfa = DfaBuilder <Codepoint> .Build(nfa, Codepoints.EOF); var state = dfa.StartState; foreach (var codepoint in input.ToCodepoints().Append(Codepoints.EOF)) { var nextStateId = state.GetTransition(codepoint); if (nextStateId == Dfa <Codepoint> .Reject) { Assert.False(match, "Regex has not matched input, but expected a match"); break; } if (nextStateId == Dfa <Codepoint> .Accept) { Assert.True(match, "Regex has matched input, but expected a non-match"); break; } state = dfa.GetState(nextStateId); Assert.NotNull(state); } }
public void MatchTestUtf8(string regex, bool caseSensitive, byte[] input, bool match) { var inputString = Encoding.UTF8.GetString(input); this.output.WriteLine(inputString); this.output.WriteLine(string.Join(" ", inputString.Select(c => ((int)c).ToString("x4")))); var expression = RegexParser.Parse(regex, 0); var mapper = new UnicodeUtf8Mapper(false, false); var rxCodepoint = expression.ToInvariant(mapper, new UnicodeCharSetProvider(), caseSensitive); var nfa = NfaBuilder <byte> .Build(rxCodepoint.Optimize()); var dfa = DfaBuilder <byte> .Build(nfa, Utf8Bytes.EOF); var state = dfa.StartState; foreach (var inputByte in input.Append(Utf8Bytes.EOF)) { var nextStateId = state.GetTransition(inputByte); if (nextStateId == Dfa <byte> .Reject) { Assert.False(match, "Regex has not matched input, but expected a match"); break; } if (nextStateId == Dfa <byte> .Accept) { Assert.True(match, "Regex has matched input, but expected a non-match"); break; } state = dfa.GetState(nextStateId); Assert.NotNull(state); } }
public void CreateDiagram(string regex, bool caseSensitive) { var expression = RegexParser.Parse(regex, 0); var mapper = new UnicodeCodepointMapper(false, false); var rxCodepoint = expression.ToInvariant(mapper, new UnicodeCharSetProvider(), caseSensitive); var rxAccept = new RxAccept <Codepoint>(rxCodepoint, default(SymbolId), null); var nfa = NfaBuilder <Codepoint> .Build(rxAccept); WriteDiagram(this.output, nfa); }
public void CreateUtf8Diagram(string regex, bool caseSensitive) { var expression = RegexParser.Parse(regex, 0); var mapper = new UnicodeUtf8Mapper(false, false); var rxCodepoint = expression.ToInvariant(mapper, new UnicodeCharSetProvider(), caseSensitive); var nfa = NfaBuilder <byte> .Build(rxCodepoint.Optimize()); var dfa = DfaBuilder <byte> .Build(nfa, 0xFF); NfaFactoryTest.WriteDiagram(this.output, nfa); WriteDiagram(this.output, dfa); }
public void Actions() { var symbolA = 1; var regexA = "a"; var r1 = RegexParser.Parse(regexA, symbolA); var symbolB = 2; var regexB = "b"; var r2 = RegexParser.Parse(regexB, symbolB); RegexExpression r = new RegexAlternation(r1, r2); var mapper = new UnicodeCodepointMapper(false, false); var rxCodepoint = r.ToInvariant(mapper, new UnicodeCharSetProvider(), true); var nfa = NfaBuilder <Codepoint> .Build(rxCodepoint.Optimize()); NfaFactoryTest.WriteDiagram(this.output, nfa); var dfa = DfaBuilder <Codepoint> .Build(nfa, Codepoints.EOF); WriteDiagram(this.output, dfa); var state = dfa.StartState; foreach (var codepoint in regexA.ToCodepoints().Append(Codepoints.EOF)) { var nextStateId = state.GetTransition(codepoint); Assert.NotEqual(Dfa <Codepoint> .Reject, nextStateId); if (nextStateId == Dfa <Codepoint> .Accept) { Assert.Equal(dfa.SymbolStates[state.Id], symbolA); break; } state = dfa.GetState(nextStateId); Assert.NotNull(state); } state = dfa.StartState; foreach (var codepoint in regexB.ToCodepoints().Append(Codepoints.EOF)) { var nextStateId = state.GetTransition(codepoint); Assert.NotEqual(Dfa <Codepoint> .Reject, nextStateId); if (nextStateId == Dfa <Codepoint> .Accept) { Assert.Equal(dfa.SymbolStates[state.Id], symbolB); break; } state = dfa.GetState(nextStateId); Assert.NotNull(state); } }