public void TestStringMatcher() { DfaState <int> dfa; { var builder = new DfaBuilder <int>(); builder.AddPattern(Pattern.Regex("a[ab]*b"), 1); builder.AddPattern(Pattern.Regex("a[ab]*c"), 2); dfa = builder.Build(null); } var matcher = new StringMatcher <int>("bbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbcaaaaaaabbbaaaaaaa"); var found = matcher.FindNext(dfa, out var result); Assert.True(found); Assert.Equal(2, result); Assert.Equal("aaaaaaaaaaaaaaaaaaaaaaaabbbbc", matcher.LastMatch); Assert.Equal(5, matcher.LastMatchStart); Assert.Equal(34, matcher.LastMatchEnd); found = matcher.FindNext(dfa, out result); Assert.True(found); Assert.Equal(1, result); Assert.Equal("aaaaaaabbb", matcher.LastMatch); found = matcher.FindNext(dfa, out result); Assert.False(found); Assert.Equal(0, result); matcher.SetPositions(15, 20, 33); Assert.Equal("aaaaa", matcher.LastMatch); matcher.FindNext(dfa, out result); Assert.Equal("aaaaaaaaabbbb", matcher.LastMatch); found = matcher.FindNext(dfa, out result); Assert.False(found); }
public void DfaWithIntResultsTest() { var builder = new DfaBuilder <int>(); builder.AddPattern(Pattern.Regex("ab"), 0); builder.AddPattern(Pattern.Regex("bb"), 1); var start = builder.Build(null); CheckDfa(start, "StructTests-2.txt", false); }
public void DfaWithEnumResultsTest() { var builder = new DfaBuilder <EnumToken>(); builder.AddPattern(Pattern.Match("if"), EnumToken.If); builder.AddPattern(Pattern.Regex("([A-Za-z])([A-Za-z0-9])*"), EnumToken.Id); var start = builder.Build(accepts => accepts.First()); CheckDfa(start, "StructTests-1.txt", false); }
public void Test() { var r1 = Pattern.Regex("if"); var r2 = Pattern.Regex("[a-zA-Z][a-zA-Z0-9]*"); var bld = new DfaBuilder <string>(); bld.AddPattern(r1, "if"); bld.AddPattern(r2, "id"); var start = bld.Build(new HashSet <string>(new[] { "if", "id" }), accepts => accepts.First()); PrintDot(start); }
private int TimeMatcher(string src, string patString) { DfaState <bool> startState; { var builder = new DfaBuilder <bool>(); builder.AddPattern(Pattern.Regex(patString), true); startState = builder.Build(null); } var count = 0; var stopWatch = new Stopwatch(); stopWatch.Start(); for (var t = stopWatch.ElapsedMilliseconds; t < SpinUp + 1000; t = stopWatch.ElapsedMilliseconds) { var m = new StringMatcher <bool>(src); if (m.FindNext(startState, out _)) { throw new Exception("not supposed to find a match"); } if (t >= SpinUp) { ++count; } } return(count); }
private static DfaState <bool> CreateMatcher() { var builder = new DfaBuilder <bool>(); builder.AddPattern(DfaLex.Pattern.Regex(Pattern), true); return(builder.Build(null)); }
public void TestTo100K() { var builder = new DfaBuilder <int>(); for (var i = 0; i < 100000; ++i) { builder.AddPattern(Pattern.Match(i.ToString()), i % 7); } var stopWatch = new Stopwatch(); stopWatch.Start(); var start = builder.Build(null); var numstates = CountStates(start); stopWatch.Stop(); var telapsed = stopWatch.ElapsedMilliseconds; helper.WriteLine($"Mininmized 100000 numbers -> value mod 7 (down to {numstates} states) in {telapsed * .001} seconds"); Assert.False(StringMatcher <int> .MatchWholeString(start, "", out _)); Assert.False(StringMatcher <int> .MatchWholeString(start, "100001", out _)); for (var i = 0; i < 100000; ++i) { Assert.True(StringMatcher <int> .MatchWholeString(start, i.ToString(), out var result)); Assert.Equal(i % 7, result); } Assert.Equal(36, numstates); }
public void Test() { //make pattern for whole numbers divisible by 3 //digits mod 3 var d0 = CharRange.AnyOf("0369"); var d1 = Pattern.Match(CharRange.AnyOf("147")).ThenMaybeRepeat(d0); var d2 = Pattern.Match(CharRange.AnyOf("258")).ThenMaybeRepeat(d0); var plus2 = Pattern.MaybeRepeat(d1.Then(d2)).Then(Pattern.AnyOf(d1.Then(d1), d2)); var minus2 = Pattern.MaybeRepeat(d2.Then(d1)).Then(Pattern.AnyOf(d2.Then(d2), d1)); var by3 = Pattern.MaybeRepeat(Pattern.AnyOf(d0, d1.Then(d2), plus2.Then(minus2))); var builder = new DfaBuilder <bool>(); builder.AddPattern(by3, true); var start = builder.Build(new HashSet <bool> { true }, null); Assert.Equal(3, CountStates(start)); CheckDfa(start, "By3Test.out.txt", false); }
private string PToString(IMatchable p) { var builder = new DfaBuilder <bool>(); builder.AddPattern(p, true); var dfa = builder.Build(null); return(PrettyPrinter.Print(dfa)); }
public void Test() { var revBuilder = new DfaBuilder <bool>(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { revBuilder.AddPattern(Pattern.AllStrings.Then(tok.Pattern().Reversed), true); } var wantStart = revBuilder.Build(null); var want = _toString(wantStart); var builder = new DfaBuilder <JavaToken>(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var haveStart = builder.BuildReverseFinder(); var have = _toString(haveStart); Assert.Equal(want, have); //make sure we properly exclude the empty string from the reverse finder DFA builder.Clear(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { if (((int)tok & 1) == 0) { builder.AddPattern(tok.Pattern(), tok); } else { builder.AddPattern(Pattern.Maybe(tok.Pattern()), tok); } } haveStart = builder.BuildReverseFinder(); have = _toString(haveStart); Assert.Equal(want, have); }
private void Build(DfaBuilder <JavaToken> builder) { foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var lang = new HashSet <JavaToken>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken>()); var start = builder.Build(lang, null); CheckDfa(start, "JavaTest.out.txt", false); }
public void TestSeparateAccentedCharacters() { var builder = new DfaBuilder <AccentedChar>(); builder.AddPattern(Pattern.Match("Les Mise\u0301rables"), AccentedChar.Miserable); var searcher = builder.BuildStringSearcher(null); var instr = "Les Mise\u0301rables"; var want = "[Miserable=Les Mise\u0301rables]"; var have = searcher.FindAndReplace(instr, TokenReplace); Assert.Equal(want, have); }
public void Test() { var builder = new DfaBuilder <JavaToken>(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var start = builder.Build(new HashSet <JavaToken>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken>()), null); CheckDfa(start, "JavaTest.out.txt", false); }
public void TestSingleToken() { var builder = new DfaBuilder <JavaToken>(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var searcher = builder.BuildStringSearcher(null); var instr = "this"; var want = "[THIS=this]"; var have = searcher.FindAndReplace(instr, TokenReplace); Assert.Equal(want, have); }
public void Test() { var builder = new DfaBuilder <JavaToken>(); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var searcher = builder.BuildStringSearcher(null); var instr = ReadResource("SearcherTestInput.txt"); var want = ReadResource("SearcherTestOutput.txt"); var have = searcher.FindAndReplace(instr, TokenReplace); Assert.Equal(want, have); }
public void Test() { var builder = new DfaBuilder <JavaToken?>(null); foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken))) { builder.AddPattern(tok.Pattern(), tok); } var start = builder.Build(new HashSet <JavaToken?>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken?>()), null); var auxInfo = new DfaAuxiliaryInformation <JavaToken?>(new[] { start }); //calculate destinies the slow way var states = auxInfo.GetStatesByNumber(); var slowDestinies = new List <ISet <JavaToken?> >(states.Count); var numStates = states.Count; for (var i = 0; i < numStates; i++) { slowDestinies.Add(new HashSet <JavaToken?>()); var state = states[i]; if (state.IsAccepting) { slowDestinies[i].Add(state.Match); } } //AtomicBoolean again = new AtomicBoolean(true); var again = true; while (again) { again = false; for (var i = 0; i < numStates; ++i) { var set = slowDestinies[i]; var state = states[i]; state.EnumerateTransitions((f, l, target) => { var targetSet = slowDestinies[target.StateNumber]; var a = true; foreach (var token in targetSet) { if (!set.Add(token)) { a = false; } } if (a) { again = true; } }); } } /* * PrettyPrinter p = new PrettyPrinter(true); * PrintWriter pw = new PrintWriter(System.out); * p.print(pw, start); * pw.flush(); */ var destinies = auxInfo.GetDestinies(); for (var i = 0; i < numStates; ++i) { var set = slowDestinies[i]; JavaToken?wantDestiny = null; if (set.Count == 1) { wantDestiny = set.FirstOrDefault(); } Assert.Equal(/*"State " + i + " destiny",*/ wantDestiny, destinies[i]); } }
public void TestSimultaneousLanguages() { var builder = new DfaBuilder <int>(); for (var i = 0; i < 100000; ++i) { if (i % 21 == 0) { builder.AddPattern(Pattern.Match(i.ToString()), 3); } else if (i % 3 == 0) { builder.AddPattern(Pattern.Match(i.ToString()), 1); } else if (i % 7 == 0) { builder.AddPattern(Pattern.Match(i.ToString()), 2); } } var langs = new List <ISet <int> >(); { var s1 = new HashSet <int>(); s1.Add(1); s1.Add(3); var s2 = new HashSet <int>(); s2.Add(2); s2.Add(3); langs.Add(s1); langs.Add(s2); } var stopWatch = new Stopwatch(); stopWatch.Start(); var starts = builder.Build(langs, null); var start3 = starts[0]; var start7 = starts[1]; var numstates = CountStates(start3, start7); stopWatch.Stop(); var telapsed = stopWatch.ElapsedMilliseconds; helper.WriteLine($"Minimized 1000000 numbers -> divisible by 7 and 3 (down to {numstates} states) in {telapsed * .001} seconds"); for (var i = 0; i < 100000; ++i) { if (i % 21 == 0) { Assert.True(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out var result)); Assert.Equal(3, result); Assert.True(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out result)); Assert.Equal(3, result); } else if (i % 3 == 0) { Assert.True(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out var result)); Assert.Equal(1, result); Assert.False(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out _)); } else if (i % 7 == 0) { Assert.False(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out _)); Assert.True(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out var result)); Assert.Equal(2, result); } } Assert.Equal(137, numstates); }