public void TestStringMatcher() { DfaState <int> dfa; { var builder = new DfaBuilder <int>(); builder.AddPattern(Pattern.Regex("a[ab]*b"), 1); builder.AddPattern(Pattern.Regex("a[ab]*c"), 2); dfa = builder.Build(null); } var matcher = new StringMatcher <int>("bbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbcaaaaaaabbbaaaaaaa"); var found = matcher.FindNext(dfa, out var result); Assert.True(found); Assert.Equal(2, result); Assert.Equal("aaaaaaaaaaaaaaaaaaaaaaaabbbbc", matcher.LastMatch); Assert.Equal(5, matcher.LastMatchStart); Assert.Equal(34, matcher.LastMatchEnd); found = matcher.FindNext(dfa, out result); Assert.True(found); Assert.Equal(1, result); Assert.Equal("aaaaaaabbb", matcher.LastMatch); found = matcher.FindNext(dfa, out result); Assert.False(found); Assert.Equal(0, result); matcher.SetPositions(15, 20, 33); Assert.Equal("aaaaa", matcher.LastMatch); matcher.FindNext(dfa, out result); Assert.Equal("aaaaaaaaabbbb", matcher.LastMatch); found = matcher.FindNext(dfa, out result); Assert.False(found); }
public void TestTo100K() { var builder = new DfaBuilder <int>(); for (var i = 0; i < 100000; ++i) { builder.AddPattern(Pattern.Match(i.ToString()), i % 7); } var stopWatch = new Stopwatch(); stopWatch.Start(); var start = builder.Build(null); var numstates = CountStates(start); stopWatch.Stop(); var telapsed = stopWatch.ElapsedMilliseconds; helper.WriteLine($"Mininmized 100000 numbers -> value mod 7 (down to {numstates} states) in {telapsed * .001} seconds"); Assert.False(StringMatcher <int> .MatchWholeString(start, "", out _)); Assert.False(StringMatcher <int> .MatchWholeString(start, "100001", out _)); for (var i = 0; i < 100000; ++i) { Assert.True(StringMatcher <int> .MatchWholeString(start, i.ToString(), out var result)); Assert.Equal(i % 7, result); } Assert.Equal(36, numstates); }
private static DfaState <bool> CreateMatcher() { var builder = new DfaBuilder <bool>(); builder.AddPattern(DfaLex.Pattern.Regex(Pattern), true); return(builder.Build(null)); }
private int TimeMatcher(string src, string patString) { DfaState <bool> startState; { var builder = new DfaBuilder <bool>(); builder.AddPattern(Pattern.Regex(patString), true); startState = builder.Build(null); } var count = 0; var stopWatch = new Stopwatch(); stopWatch.Start(); for (var t = stopWatch.ElapsedMilliseconds; t < SpinUp + 1000; t = stopWatch.ElapsedMilliseconds) { var m = new StringMatcher <bool>(src); if (m.FindNext(startState, out _)) { throw new Exception("not supposed to find a match"); } if (t >= SpinUp) { ++count; } } return(count); }
public void Test() { //make pattern for whole numbers divisible by 3 //digits mod 3 var d0 = CharRange.AnyOf("0369"); var d1 = Pattern.Match(CharRange.AnyOf("147")).ThenMaybeRepeat(d0); var d2 = Pattern.Match(CharRange.AnyOf("258")).ThenMaybeRepeat(d0); var plus2 = Pattern.MaybeRepeat(d1.Then(d2)).Then(Pattern.AnyOf(d1.Then(d1), d2)); var minus2 = Pattern.MaybeRepeat(d2.Then(d1)).Then(Pattern.AnyOf(d2.Then(d2), d1)); var by3 = Pattern.MaybeRepeat(Pattern.AnyOf(d0, d1.Then(d2), plus2.Then(minus2))); var builder = new DfaBuilder <bool>(); builder.AddPattern(by3, true); var start = builder.Build(new HashSet <bool> { true }, null); Assert.Equal(3, CountStates(start)); CheckDfa(start, "By3Test.out.txt", false); }
public Dfa <TLetter> ComputeDfa() { var regex = this.ComputeRx(); var nfa = NfaBuilder <TLetter> .Build(regex); return(DfaBuilder <TLetter> .Build(nfa, this.eof, true)); }
private string PToString(IMatchable p) { var builder = new DfaBuilder <bool>(); builder.AddPattern(p, true); var dfa = builder.Build(null); return(PrettyPrinter.Print(dfa)); }
public Dfa <LetterId> ComputeDfa(out AlphabetBuilder <TLetter> alphabet, RangeSet <TLetter> validRanges = default) { var regex = this.ComputeRx(); alphabet = new AlphabetBuilder <TLetter>(regex, this.eof, validRanges); var nfa = NfaBuilder <LetterId> .Build(alphabet.Expression); return(DfaBuilder <LetterId> .Build(nfa, LetterId.Eof, true)); }
public void DfaWithEnumResultsTest() { var builder = new DfaBuilder <EnumToken>(); builder.AddPattern(Pattern.Match("if"), EnumToken.If); builder.AddPattern(Pattern.Regex("([A-Za-z])([A-Za-z0-9])*"), EnumToken.Id); var start = builder.Build(accepts => accepts.First()); CheckDfa(start, "StructTests-1.txt", false); }
public void DfaWithIntResultsTest() { var builder = new DfaBuilder <int>(); builder.AddPattern(Pattern.Regex("ab"), 0); builder.AddPattern(Pattern.Regex("bb"), 1); var start = builder.Build(null); CheckDfa(start, "StructTests-2.txt", false); }
private CharsetGrammar() { var provider = new UnicodeCharSetProvider(); var mapper = new UnicodeUtf16Mapper(false, false); var rx = RegexLexer.CreateRx(mapper); var rxWhitespace = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymWhitespace), SymWhitespace, 0); var rxCharset = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymCharset), SymCharset, 0); var rxRegexCharset = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymRegexCharset), SymRegexCharset, 0); var rxUnion = new RxAccept <char>(RegexMatchSet.FromChars('|', '+').ToInvariant(mapper, provider, true), SymUnion, 0); var rxSubtract = new RxAccept <char>(RegexMatchSet.FromChars('-').ToInvariant(mapper, provider, true), SymSubtract, 0); var rxIntersect = new RxAccept <char>(RegexMatchSet.FromChars('&').ToInvariant(mapper, provider, true), SymIntersect, 0); var rxDifference = new RxAccept <char>(RegexMatchSet.FromChars('^').ToInvariant(mapper, provider, true), SymDifference, 0); var rxNegate = new RxAccept <char>(RegexMatchSet.FromChars('~').ToInvariant(mapper, provider, true), SymNegate, 0); var rxParensOpen = new RxAccept <char>(RegexMatchSet.FromChars('(').ToInvariant(mapper, provider, true), SymParensOpen, 0); var rxParensClose = new RxAccept <char>(RegexMatchSet.FromChars(')').ToInvariant(mapper, provider, true), SymParensClose, 0); var alpha = new AlphabetBuilder <char>( new RxAlternation <char>(rxWhitespace, new RxAlternation <char>(rxCharset, new RxAlternation <char>(rxRegexCharset, new RxAlternation <char>(rxUnion, new RxAlternation <char>(rxSubtract, new RxAlternation <char>(rxIntersect, new RxAlternation <char>(rxDifference, new RxAlternation <char>(rxNegate, new RxAlternation <char>(rxParensOpen, rxParensClose))))))))), Utf16Chars.EOF, Utf16Chars.ValidBmp); var nfa = NfaBuilder <LetterId> .Build(alpha.Expression); var dfa = DfaBuilder <LetterId> .Build(nfa, LetterId.Eof); if (dfa.StartState.Id != default(Id <DfaState <LetterId> >)) { throw new InvalidOperationException($"Internal error: Unexpected DFA start state {dfa.StartState.Id}"); } this.stateMachine = DfaStateMachineEmitter.CreateExpression(dfa, AlphabetMapperEmitter <char> .CreateExpression(alpha)).Compile(); this.table = new LalrTableGenerator(new GrammarBuilder(-2, -1, SymExpression) { { SymUnionExpression, SymExpression, SymUnion, SymNegateExpression }, { SymExpression, SymUnionExpression }, { SymSubtractExpression, SymExpression, SymSubtract, SymNegateExpression }, { SymExpression, SymSubtractExpression }, { SymIntersectExpression, SymExpression, SymIntersect, SymNegateExpression }, { SymExpression, SymIntersectExpression }, { SymDifferenceExpression, SymExpression, SymDifference, SymNegateExpression }, { SymExpression, SymDifferenceExpression }, { SymExpression, SymNegateExpression }, { SymNegateExpression, SymNegate, SymValueExpression }, { SymNegateExpression, SymValueExpression }, { SymValueExpression, SymParensOpen, SymExpression, SymParensClose }, { SymValueExpression, SymCharset }, { SymValueExpression, SymRegexCharset } }) .ComputeTable(); }
public void Test() { var r1 = Pattern.Regex("if"); var r2 = Pattern.Regex("[a-zA-Z][a-zA-Z0-9]*"); var bld = new DfaBuilder <string>(); bld.AddPattern(r1, "if"); bld.AddPattern(r2, "id"); var start = bld.Build(new HashSet <string>(new[] { "if", "id" }), accepts => accepts.First()); PrintDot(start); }
private void Build(DfaBuilder <JavaToken> builder) { foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var lang = new HashSet <JavaToken>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken>()); var start = builder.Build(lang, null); CheckDfa(start, "JavaTest.out.txt", false); }
public void Test() { var builder = new DfaBuilder <JavaToken>(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var start = builder.Build(new HashSet <JavaToken>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken>()), null); CheckDfa(start, "JavaTest.out.txt", false); }
public void Test() { var revBuilder = new DfaBuilder <bool>(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { revBuilder.AddPattern(Pattern.AllStrings.Then(tok.Pattern().Reversed), true); } var wantStart = revBuilder.Build(null); var want = _toString(wantStart); var builder = new DfaBuilder <JavaToken>(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var haveStart = builder.BuildReverseFinder(); var have = _toString(haveStart); Assert.Equal(want, have); //make sure we properly exclude the empty string from the reverse finder DFA builder.Clear(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { if (((int)tok & 1) == 0) { builder.AddPattern(tok.Pattern(), tok); } else { builder.AddPattern(Pattern.Maybe(tok.Pattern()), tok); } } haveStart = builder.BuildReverseFinder(); have = _toString(haveStart); Assert.Equal(want, have); }
public void TestSimultaneousLanguages() { var builder = new DfaBuilder <int>(); for (var i = 0; i < 100000; ++i) { if (i % 21 == 0) { builder.AddPattern(Pattern.Match(i.ToString()), 3); } else if (i % 3 == 0) { builder.AddPattern(Pattern.Match(i.ToString()), 1); } else if (i % 7 == 0) { builder.AddPattern(Pattern.Match(i.ToString()), 2); } } var langs = new List <ISet <int> >(); { var s1 = new HashSet <int>(); s1.Add(1); s1.Add(3); var s2 = new HashSet <int>(); s2.Add(2); s2.Add(3); langs.Add(s1); langs.Add(s2); } var stopWatch = new Stopwatch(); stopWatch.Start(); var starts = builder.Build(langs, null); var start3 = starts[0]; var start7 = starts[1]; var numstates = CountStates(start3, start7); stopWatch.Stop(); var telapsed = stopWatch.ElapsedMilliseconds; helper.WriteLine($"Minimized 1000000 numbers -> divisible by 7 and 3 (down to {numstates} states) in {telapsed * .001} seconds"); for (var i = 0; i < 100000; ++i) { if (i % 21 == 0) { Assert.True(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out var result)); Assert.Equal(3, result); Assert.True(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out result)); Assert.Equal(3, result); } else if (i % 3 == 0) { Assert.True(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out var result)); Assert.Equal(1, result); Assert.False(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out _)); } else if (i % 7 == 0) { Assert.False(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out _)); Assert.True(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out var result)); Assert.Equal(2, result); } } Assert.Equal(137, numstates); }
public void Test() { var builder = new DfaBuilder <JavaToken?>(null); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var start = builder.Build(new HashSet <JavaToken?>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken?>()), null); var auxInfo = new DfaAuxiliaryInformation <JavaToken?>(new[] { start }); //calculate destinies the slow way var states = auxInfo.GetStatesByNumber(); var slowDestinies = new List <ISet <JavaToken?> >(states.Count); var numStates = states.Count; for (var i = 0; i < numStates; i++) { slowDestinies.Add(new HashSet <JavaToken?>()); var state = states[i]; if (state.IsAccepting) { slowDestinies[i].Add(state.Match); } } //AtomicBoolean again = new AtomicBoolean(true); var again = true; while (again) { again = false; for (var i = 0; i < numStates; ++i) { var set = slowDestinies[i]; var state = states[i]; state.EnumerateTransitions((f, l, target) => { var targetSet = slowDestinies[target.StateNumber]; var a = true; foreach (var token in targetSet) { if (!set.Add(token)) { a = false; } } if (a) { again = true; } }); } } /* * PrettyPrinter p = new PrettyPrinter(true); * PrintWriter pw = new PrintWriter(System.out); * p.print(pw, start); * pw.flush(); */ var destinies = auxInfo.GetDestinies(); for (var i = 0; i < numStates; ++i) { var set = slowDestinies[i]; JavaToken?wantDestiny = null; if (set.Count == 1) { wantDestiny = set.FirstOrDefault(); } Assert.Equal(/*"State " + i + " destiny",*/ wantDestiny, destinies[i]); } }