public void MatchTestUtf8(string regex, bool caseSensitive, byte[] input, bool match) { var inputString = Encoding.UTF8.GetString(input); this.output.WriteLine(inputString); this.output.WriteLine(string.Join(" ", inputString.Select(c => ((int)c).ToString("x4")))); var expression = RegexParser.Parse(regex, 0); var mapper = new UnicodeUtf8Mapper(false, false); var rxCodepoint = expression.ToInvariant(mapper, new UnicodeCharSetProvider(), caseSensitive); var nfa = NfaBuilder <byte> .Build(rxCodepoint.Optimize()); var dfa = DfaBuilder <byte> .Build(nfa, Utf8Bytes.EOF); var state = dfa.StartState; foreach (var inputByte in input.Append(Utf8Bytes.EOF)) { var nextStateId = state.GetTransition(inputByte); if (nextStateId == Dfa <byte> .Reject) { Assert.False(match, "Regex has not matched input, but expected a match"); break; } if (nextStateId == Dfa <byte> .Accept) { Assert.True(match, "Regex has matched input, but expected a non-match"); break; } state = dfa.GetState(nextStateId); Assert.NotNull(state); } }
public void CreateUtf8Diagram(string regex, bool caseSensitive) { var expression = RegexParser.Parse(regex, 0); var mapper = new UnicodeUtf8Mapper(false, false); var rxCodepoint = expression.ToInvariant(mapper, new UnicodeCharSetProvider(), caseSensitive); var nfa = NfaBuilder <byte> .Build(rxCodepoint.Optimize()); var dfa = DfaBuilder <byte> .Build(nfa, 0xFF); NfaFactoryTest.WriteDiagram(this.output, nfa); WriteDiagram(this.output, dfa); }