public void TestStringMatcher()
        {
            DfaState <int> dfa;
            {
                var builder = new DfaBuilder <int>();
                builder.AddPattern(Pattern.Regex("a[ab]*b"), 1);
                builder.AddPattern(Pattern.Regex("a[ab]*c"), 2);
                dfa = builder.Build(null);
            }
            var matcher = new StringMatcher <int>("bbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbcaaaaaaabbbaaaaaaa");
            var found   = matcher.FindNext(dfa, out var result);

            Assert.True(found);
            Assert.Equal(2, result);
            Assert.Equal("aaaaaaaaaaaaaaaaaaaaaaaabbbbc", matcher.LastMatch);
            Assert.Equal(5, matcher.LastMatchStart);
            Assert.Equal(34, matcher.LastMatchEnd);
            found = matcher.FindNext(dfa, out result);
            Assert.True(found);
            Assert.Equal(1, result);
            Assert.Equal("aaaaaaabbb", matcher.LastMatch);
            found = matcher.FindNext(dfa, out result);
            Assert.False(found);
            Assert.Equal(0, result);

            matcher.SetPositions(15, 20, 33);
            Assert.Equal("aaaaa", matcher.LastMatch);
            matcher.FindNext(dfa, out result);
            Assert.Equal("aaaaaaaaabbbb", matcher.LastMatch);
            found = matcher.FindNext(dfa, out result);
            Assert.False(found);
        }
Exemple #2
0
        public void DfaWithIntResultsTest()
        {
            var builder = new DfaBuilder <int>();

            builder.AddPattern(Pattern.Regex("ab"), 0);
            builder.AddPattern(Pattern.Regex("bb"), 1);
            var start = builder.Build(null);

            CheckDfa(start, "StructTests-2.txt", false);
        }
Exemple #3
0
        public void DfaWithEnumResultsTest()
        {
            var builder = new DfaBuilder <EnumToken>();

            builder.AddPattern(Pattern.Match("if"), EnumToken.If);
            builder.AddPattern(Pattern.Regex("([A-Za-z])([A-Za-z0-9])*"), EnumToken.Id);
            var start = builder.Build(accepts => accepts.First());

            CheckDfa(start, "StructTests-1.txt", false);
        }
Exemple #4
0
        public void Test()
        {
            var r1  = Pattern.Regex("if");
            var r2  = Pattern.Regex("[a-zA-Z][a-zA-Z0-9]*");
            var bld = new DfaBuilder <string>();

            bld.AddPattern(r1, "if");
            bld.AddPattern(r2, "id");
            var start = bld.Build(new HashSet <string>(new[] { "if", "id" }), accepts => accepts.First());

            PrintDot(start);
        }
Exemple #5
0
        private int TimeMatcher(string src, string patString)
        {
            DfaState <bool> startState;
            {
                var builder = new DfaBuilder <bool>();
                builder.AddPattern(Pattern.Regex(patString), true);
                startState = builder.Build(null);
            }

            var count     = 0;
            var stopWatch = new Stopwatch();

            stopWatch.Start();
            for (var t = stopWatch.ElapsedMilliseconds; t < SpinUp + 1000; t = stopWatch.ElapsedMilliseconds)
            {
                var m = new StringMatcher <bool>(src);
                if (m.FindNext(startState, out _))
                {
                    throw new Exception("not supposed to find a match");
                }

                if (t >= SpinUp)
                {
                    ++count;
                }
            }

            return(count);
        }
Exemple #6
0
        private static DfaState <bool> CreateMatcher()
        {
            var builder = new DfaBuilder <bool>();

            builder.AddPattern(DfaLex.Pattern.Regex(Pattern), true);
            return(builder.Build(null));
        }
Exemple #7
0
        public void TestTo100K()
        {
            var builder = new DfaBuilder <int>();

            for (var i = 0; i < 100000; ++i)
            {
                builder.AddPattern(Pattern.Match(i.ToString()), i % 7);
            }

            var stopWatch = new Stopwatch();

            stopWatch.Start();
            var start     = builder.Build(null);
            var numstates = CountStates(start);

            stopWatch.Stop();
            var telapsed = stopWatch.ElapsedMilliseconds;

            helper.WriteLine($"Mininmized 100000 numbers -> value mod 7 (down to {numstates} states) in {telapsed * .001} seconds");
            Assert.False(StringMatcher <int> .MatchWholeString(start, "", out _));
            Assert.False(StringMatcher <int> .MatchWholeString(start, "100001", out _));
            for (var i = 0; i < 100000; ++i)
            {
                Assert.True(StringMatcher <int> .MatchWholeString(start, i.ToString(), out var result));
                Assert.Equal(i % 7, result);
            }

            Assert.Equal(36, numstates);
        }
Exemple #8
0
        public void Test()
        {
            //make pattern for whole numbers divisible by 3

            //digits mod 3
            var d0 = CharRange.AnyOf("0369");
            var d1 = Pattern.Match(CharRange.AnyOf("147")).ThenMaybeRepeat(d0);
            var d2 = Pattern.Match(CharRange.AnyOf("258")).ThenMaybeRepeat(d0);

            var plus2 = Pattern.MaybeRepeat(d1.Then(d2)).Then(Pattern.AnyOf(d1.Then(d1), d2));

            var minus2 = Pattern.MaybeRepeat(d2.Then(d1)).Then(Pattern.AnyOf(d2.Then(d2), d1));

            var by3 = Pattern.MaybeRepeat(Pattern.AnyOf(d0, d1.Then(d2), plus2.Then(minus2)));

            var builder = new DfaBuilder <bool>();

            builder.AddPattern(by3, true);
            var start = builder.Build(new HashSet <bool> {
                true
            }, null);

            Assert.Equal(3, CountStates(start));
            CheckDfa(start, "By3Test.out.txt", false);
        }
Exemple #9
0
        private string PToString(IMatchable p)
        {
            var builder = new DfaBuilder <bool>();

            builder.AddPattern(p, true);
            var dfa = builder.Build(null);

            return(PrettyPrinter.Print(dfa));
        }
Exemple #10
0
        public void Test()
        {
            var revBuilder = new DfaBuilder <bool>();

            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                revBuilder.AddPattern(Pattern.AllStrings.Then(tok.Pattern().Reversed), true);
            }

            var wantStart = revBuilder.Build(null);
            var want      = _toString(wantStart);

            var builder = new DfaBuilder <JavaToken>();

            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                builder.AddPattern(tok.Pattern(), tok);
            }

            var haveStart = builder.BuildReverseFinder();
            var have      = _toString(haveStart);

            Assert.Equal(want, have);

            //make sure we properly exclude the empty string from the reverse finder DFA
            builder.Clear();
            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                if (((int)tok & 1) == 0)
                {
                    builder.AddPattern(tok.Pattern(), tok);
                }
                else
                {
                    builder.AddPattern(Pattern.Maybe(tok.Pattern()), tok);
                }
            }

            haveStart = builder.BuildReverseFinder();
            have      = _toString(haveStart);
            Assert.Equal(want, have);
        }
Exemple #11
0
        private void Build(DfaBuilder <JavaToken> builder)
        {
            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                builder.AddPattern(tok.Pattern(), tok);
            }

            var lang  = new HashSet <JavaToken>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken>());
            var start = builder.Build(lang, null);

            CheckDfa(start, "JavaTest.out.txt", false);
        }
Exemple #12
0
        public void TestSeparateAccentedCharacters()
        {
            var builder = new DfaBuilder <AccentedChar>();

            builder.AddPattern(Pattern.Match("Les Mise\u0301rables"), AccentedChar.Miserable);

            var searcher = builder.BuildStringSearcher(null);
            var instr    = "Les Mise\u0301rables";
            var want     = "[Miserable=Les Mise\u0301rables]";
            var have     = searcher.FindAndReplace(instr, TokenReplace);

            Assert.Equal(want, have);
        }
Exemple #13
0
        public void Test()
        {
            var builder = new DfaBuilder <JavaToken>();

            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                builder.AddPattern(tok.Pattern(), tok);
            }

            var start = builder.Build(new HashSet <JavaToken>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken>()), null);

            CheckDfa(start, "JavaTest.out.txt", false);
        }
Exemple #14
0
        public void TestSingleToken()
        {
            var builder = new DfaBuilder <JavaToken>();

            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                builder.AddPattern(tok.Pattern(), tok);
            }

            var searcher = builder.BuildStringSearcher(null);
            var instr    = "this";
            var want     = "[THIS=this]";
            var have     = searcher.FindAndReplace(instr, TokenReplace);

            Assert.Equal(want, have);
        }
Exemple #15
0
        public void Test()
        {
            var builder = new DfaBuilder <JavaToken>();

            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                builder.AddPattern(tok.Pattern(), tok);
            }

            var searcher = builder.BuildStringSearcher(null);
            var instr    = ReadResource("SearcherTestInput.txt");
            var want     = ReadResource("SearcherTestOutput.txt");
            var have     = searcher.FindAndReplace(instr, TokenReplace);

            Assert.Equal(want, have);
        }
Exemple #16
0
        public void Test()
        {
            var builder = new DfaBuilder <JavaToken?>(null);

            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                builder.AddPattern(tok.Pattern(), tok);
            }

            var start   = builder.Build(new HashSet <JavaToken?>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken?>()), null);
            var auxInfo = new DfaAuxiliaryInformation <JavaToken?>(new[] { start });

            //calculate destinies the slow way
            var states        = auxInfo.GetStatesByNumber();
            var slowDestinies = new List <ISet <JavaToken?> >(states.Count);
            var numStates     = states.Count;

            for (var i = 0; i < numStates; i++)
            {
                slowDestinies.Add(new HashSet <JavaToken?>());
                var state = states[i];
                if (state.IsAccepting)
                {
                    slowDestinies[i].Add(state.Match);
                }
            }

            //AtomicBoolean again = new AtomicBoolean(true);
            var again = true;

            while (again)
            {
                again = false;
                for (var i = 0; i < numStates; ++i)
                {
                    var set   = slowDestinies[i];
                    var state = states[i];
                    state.EnumerateTransitions((f, l, target) =>
                    {
                        var targetSet = slowDestinies[target.StateNumber];
                        var a         = true;
                        foreach (var token in targetSet)
                        {
                            if (!set.Add(token))
                            {
                                a = false;
                            }
                        }

                        if (a)
                        {
                            again = true;
                        }
                    });
                }
            }

            /*
             *  PrettyPrinter p = new PrettyPrinter(true);
             *  PrintWriter pw = new PrintWriter(System.out);
             *  p.print(pw, start);
             *  pw.flush();
             */
            var destinies = auxInfo.GetDestinies();

            for (var i = 0; i < numStates; ++i)
            {
                var       set         = slowDestinies[i];
                JavaToken?wantDestiny = null;
                if (set.Count == 1)
                {
                    wantDestiny = set.FirstOrDefault();
                }

                Assert.Equal(/*"State " + i + " destiny",*/ wantDestiny, destinies[i]);
            }
        }
Exemple #17
0
        public void TestSimultaneousLanguages()
        {
            var builder = new DfaBuilder <int>();

            for (var i = 0; i < 100000; ++i)
            {
                if (i % 21 == 0)
                {
                    builder.AddPattern(Pattern.Match(i.ToString()), 3);
                }
                else if (i % 3 == 0)
                {
                    builder.AddPattern(Pattern.Match(i.ToString()), 1);
                }
                else if (i % 7 == 0)
                {
                    builder.AddPattern(Pattern.Match(i.ToString()), 2);
                }
            }

            var langs = new List <ISet <int> >();
            {
                var s1 = new HashSet <int>();
                s1.Add(1);
                s1.Add(3);
                var s2 = new HashSet <int>();
                s2.Add(2);
                s2.Add(3);
                langs.Add(s1);
                langs.Add(s2);
            }
            var stopWatch = new Stopwatch();

            stopWatch.Start();
            var starts    = builder.Build(langs, null);
            var start3    = starts[0];
            var start7    = starts[1];
            var numstates = CountStates(start3, start7);

            stopWatch.Stop();
            var telapsed = stopWatch.ElapsedMilliseconds;

            helper.WriteLine($"Minimized 1000000 numbers -> divisible by 7 and 3 (down to {numstates} states) in {telapsed * .001} seconds");
            for (var i = 0; i < 100000; ++i)
            {
                if (i % 21 == 0)
                {
                    Assert.True(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out var result));
                    Assert.Equal(3, result);
                    Assert.True(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out result));
                    Assert.Equal(3, result);
                }
                else if (i % 3 == 0)
                {
                    Assert.True(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out var result));
                    Assert.Equal(1, result);
                    Assert.False(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out _));
                }
                else if (i % 7 == 0)
                {
                    Assert.False(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out _));
                    Assert.True(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out var result));
                    Assert.Equal(2, result);
                }
            }

            Assert.Equal(137, numstates);
        }