public void TestStringMatcher()
        {
            DfaState <int> dfa;
            {
                var builder = new DfaBuilder <int>();
                builder.AddPattern(Pattern.Regex("a[ab]*b"), 1);
                builder.AddPattern(Pattern.Regex("a[ab]*c"), 2);
                dfa = builder.Build(null);
            }
            var matcher = new StringMatcher <int>("bbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbcaaaaaaabbbaaaaaaa");
            var found   = matcher.FindNext(dfa, out var result);

            Assert.True(found);
            Assert.Equal(2, result);
            Assert.Equal("aaaaaaaaaaaaaaaaaaaaaaaabbbbc", matcher.LastMatch);
            Assert.Equal(5, matcher.LastMatchStart);
            Assert.Equal(34, matcher.LastMatchEnd);
            found = matcher.FindNext(dfa, out result);
            Assert.True(found);
            Assert.Equal(1, result);
            Assert.Equal("aaaaaaabbb", matcher.LastMatch);
            found = matcher.FindNext(dfa, out result);
            Assert.False(found);
            Assert.Equal(0, result);

            matcher.SetPositions(15, 20, 33);
            Assert.Equal("aaaaa", matcher.LastMatch);
            matcher.FindNext(dfa, out result);
            Assert.Equal("aaaaaaaaabbbb", matcher.LastMatch);
            found = matcher.FindNext(dfa, out result);
            Assert.False(found);
        }
Exemple #2
0
        public void TestTo100K()
        {
            var builder = new DfaBuilder <int>();

            for (var i = 0; i < 100000; ++i)
            {
                builder.AddPattern(Pattern.Match(i.ToString()), i % 7);
            }

            var stopWatch = new Stopwatch();

            stopWatch.Start();
            var start     = builder.Build(null);
            var numstates = CountStates(start);

            stopWatch.Stop();
            var telapsed = stopWatch.ElapsedMilliseconds;

            helper.WriteLine($"Mininmized 100000 numbers -> value mod 7 (down to {numstates} states) in {telapsed * .001} seconds");
            Assert.False(StringMatcher <int> .MatchWholeString(start, "", out _));
            Assert.False(StringMatcher <int> .MatchWholeString(start, "100001", out _));
            for (var i = 0; i < 100000; ++i)
            {
                Assert.True(StringMatcher <int> .MatchWholeString(start, i.ToString(), out var result));
                Assert.Equal(i % 7, result);
            }

            Assert.Equal(36, numstates);
        }
Exemple #3
0
        private static DfaState <bool> CreateMatcher()
        {
            var builder = new DfaBuilder <bool>();

            builder.AddPattern(DfaLex.Pattern.Regex(Pattern), true);
            return(builder.Build(null));
        }
Exemple #4
0
        private int TimeMatcher(string src, string patString)
        {
            DfaState <bool> startState;
            {
                var builder = new DfaBuilder <bool>();
                builder.AddPattern(Pattern.Regex(patString), true);
                startState = builder.Build(null);
            }

            var count     = 0;
            var stopWatch = new Stopwatch();

            stopWatch.Start();
            for (var t = stopWatch.ElapsedMilliseconds; t < SpinUp + 1000; t = stopWatch.ElapsedMilliseconds)
            {
                var m = new StringMatcher <bool>(src);
                if (m.FindNext(startState, out _))
                {
                    throw new Exception("not supposed to find a match");
                }

                if (t >= SpinUp)
                {
                    ++count;
                }
            }

            return(count);
        }
Exemple #5
0
        public void Test()
        {
            //make pattern for whole numbers divisible by 3

            //digits mod 3
            var d0 = CharRange.AnyOf("0369");
            var d1 = Pattern.Match(CharRange.AnyOf("147")).ThenMaybeRepeat(d0);
            var d2 = Pattern.Match(CharRange.AnyOf("258")).ThenMaybeRepeat(d0);

            var plus2 = Pattern.MaybeRepeat(d1.Then(d2)).Then(Pattern.AnyOf(d1.Then(d1), d2));

            var minus2 = Pattern.MaybeRepeat(d2.Then(d1)).Then(Pattern.AnyOf(d2.Then(d2), d1));

            var by3 = Pattern.MaybeRepeat(Pattern.AnyOf(d0, d1.Then(d2), plus2.Then(minus2)));

            var builder = new DfaBuilder <bool>();

            builder.AddPattern(by3, true);
            var start = builder.Build(new HashSet <bool> {
                true
            }, null);

            Assert.Equal(3, CountStates(start));
            CheckDfa(start, "By3Test.out.txt", false);
        }
Exemple #6
0
        public Dfa <TLetter> ComputeDfa()
        {
            var regex = this.ComputeRx();
            var nfa   = NfaBuilder <TLetter> .Build(regex);

            return(DfaBuilder <TLetter> .Build(nfa, this.eof, true));
        }
Exemple #7
0
        private string PToString(IMatchable p)
        {
            var builder = new DfaBuilder <bool>();

            builder.AddPattern(p, true);
            var dfa = builder.Build(null);

            return(PrettyPrinter.Print(dfa));
        }
Exemple #8
0
        public Dfa <LetterId> ComputeDfa(out AlphabetBuilder <TLetter> alphabet, RangeSet <TLetter> validRanges = default)
        {
            var regex = this.ComputeRx();

            alphabet = new AlphabetBuilder <TLetter>(regex, this.eof, validRanges);
            var nfa = NfaBuilder <LetterId> .Build(alphabet.Expression);

            return(DfaBuilder <LetterId> .Build(nfa, LetterId.Eof, true));
        }
Exemple #9
0
        public void DfaWithEnumResultsTest()
        {
            var builder = new DfaBuilder <EnumToken>();

            builder.AddPattern(Pattern.Match("if"), EnumToken.If);
            builder.AddPattern(Pattern.Regex("([A-Za-z])([A-Za-z0-9])*"), EnumToken.Id);
            var start = builder.Build(accepts => accepts.First());

            CheckDfa(start, "StructTests-1.txt", false);
        }
Exemple #10
0
        public void DfaWithIntResultsTest()
        {
            var builder = new DfaBuilder <int>();

            builder.AddPattern(Pattern.Regex("ab"), 0);
            builder.AddPattern(Pattern.Regex("bb"), 1);
            var start = builder.Build(null);

            CheckDfa(start, "StructTests-2.txt", false);
        }
        private CharsetGrammar()
        {
            var provider       = new UnicodeCharSetProvider();
            var mapper         = new UnicodeUtf16Mapper(false, false);
            var rx             = RegexLexer.CreateRx(mapper);
            var rxWhitespace   = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymWhitespace), SymWhitespace, 0);
            var rxCharset      = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymCharset), SymCharset, 0);
            var rxRegexCharset = new RxAccept <char>(RxOfSymbol <char> .Extract(rx, RegexLexer.SymRegexCharset), SymRegexCharset, 0);
            var rxUnion        = new RxAccept <char>(RegexMatchSet.FromChars('|', '+').ToInvariant(mapper, provider, true), SymUnion, 0);
            var rxSubtract     = new RxAccept <char>(RegexMatchSet.FromChars('-').ToInvariant(mapper, provider, true), SymSubtract, 0);
            var rxIntersect    = new RxAccept <char>(RegexMatchSet.FromChars('&').ToInvariant(mapper, provider, true), SymIntersect, 0);
            var rxDifference   = new RxAccept <char>(RegexMatchSet.FromChars('^').ToInvariant(mapper, provider, true), SymDifference, 0);
            var rxNegate       = new RxAccept <char>(RegexMatchSet.FromChars('~').ToInvariant(mapper, provider, true), SymNegate, 0);
            var rxParensOpen   = new RxAccept <char>(RegexMatchSet.FromChars('(').ToInvariant(mapper, provider, true), SymParensOpen, 0);
            var rxParensClose  = new RxAccept <char>(RegexMatchSet.FromChars(')').ToInvariant(mapper, provider, true), SymParensClose, 0);
            var alpha          = new AlphabetBuilder <char>(
                new RxAlternation <char>(rxWhitespace,
                                         new RxAlternation <char>(rxCharset,
                                                                  new RxAlternation <char>(rxRegexCharset,
                                                                                           new RxAlternation <char>(rxUnion,
                                                                                                                    new RxAlternation <char>(rxSubtract,
                                                                                                                                             new RxAlternation <char>(rxIntersect,
                                                                                                                                                                      new RxAlternation <char>(rxDifference,
                                                                                                                                                                                               new RxAlternation <char>(rxNegate,
                                                                                                                                                                                                                        new RxAlternation <char>(rxParensOpen, rxParensClose))))))))),
                Utf16Chars.EOF,
                Utf16Chars.ValidBmp);
            var nfa = NfaBuilder <LetterId> .Build(alpha.Expression);

            var dfa = DfaBuilder <LetterId> .Build(nfa, LetterId.Eof);

            if (dfa.StartState.Id != default(Id <DfaState <LetterId> >))
            {
                throw new InvalidOperationException($"Internal error: Unexpected DFA start state {dfa.StartState.Id}");
            }
            this.stateMachine = DfaStateMachineEmitter.CreateExpression(dfa, AlphabetMapperEmitter <char> .CreateExpression(alpha)).Compile();
            this.table        = new LalrTableGenerator(new GrammarBuilder(-2, -1, SymExpression)
            {
                { SymUnionExpression, SymExpression, SymUnion, SymNegateExpression },
                { SymExpression, SymUnionExpression },
                { SymSubtractExpression, SymExpression, SymSubtract, SymNegateExpression },
                { SymExpression, SymSubtractExpression },
                { SymIntersectExpression, SymExpression, SymIntersect, SymNegateExpression },
                { SymExpression, SymIntersectExpression },
                { SymDifferenceExpression, SymExpression, SymDifference, SymNegateExpression },
                { SymExpression, SymDifferenceExpression },
                { SymExpression, SymNegateExpression },
                { SymNegateExpression, SymNegate, SymValueExpression },
                { SymNegateExpression, SymValueExpression },
                { SymValueExpression, SymParensOpen, SymExpression, SymParensClose },
                { SymValueExpression, SymCharset },
                { SymValueExpression, SymRegexCharset }
            })
                                .ComputeTable();
        }
Exemple #12
0
        public void Test()
        {
            var r1  = Pattern.Regex("if");
            var r2  = Pattern.Regex("[a-zA-Z][a-zA-Z0-9]*");
            var bld = new DfaBuilder <string>();

            bld.AddPattern(r1, "if");
            bld.AddPattern(r2, "id");
            var start = bld.Build(new HashSet <string>(new[] { "if", "id" }), accepts => accepts.First());

            PrintDot(start);
        }
Exemple #13
0
        private void Build(DfaBuilder <JavaToken> builder)
        {
            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                builder.AddPattern(tok.Pattern(), tok);
            }

            var lang  = new HashSet <JavaToken>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken>());
            var start = builder.Build(lang, null);

            CheckDfa(start, "JavaTest.out.txt", false);
        }
Exemple #14
0
        public void Test()
        {
            var builder = new DfaBuilder <JavaToken>();

            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                builder.AddPattern(tok.Pattern(), tok);
            }

            var start = builder.Build(new HashSet <JavaToken>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken>()), null);

            CheckDfa(start, "JavaTest.out.txt", false);
        }
Exemple #15
0
        public void Test()
        {
            var revBuilder = new DfaBuilder <bool>();

            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                revBuilder.AddPattern(Pattern.AllStrings.Then(tok.Pattern().Reversed), true);
            }

            var wantStart = revBuilder.Build(null);
            var want      = _toString(wantStart);

            var builder = new DfaBuilder <JavaToken>();

            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                builder.AddPattern(tok.Pattern(), tok);
            }

            var haveStart = builder.BuildReverseFinder();
            var have      = _toString(haveStart);

            Assert.Equal(want, have);

            //make sure we properly exclude the empty string from the reverse finder DFA
            builder.Clear();
            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                if (((int)tok & 1) == 0)
                {
                    builder.AddPattern(tok.Pattern(), tok);
                }
                else
                {
                    builder.AddPattern(Pattern.Maybe(tok.Pattern()), tok);
                }
            }

            haveStart = builder.BuildReverseFinder();
            have      = _toString(haveStart);
            Assert.Equal(want, have);
        }
Exemple #16
0
        public void TestSimultaneousLanguages()
        {
            var builder = new DfaBuilder <int>();

            for (var i = 0; i < 100000; ++i)
            {
                if (i % 21 == 0)
                {
                    builder.AddPattern(Pattern.Match(i.ToString()), 3);
                }
                else if (i % 3 == 0)
                {
                    builder.AddPattern(Pattern.Match(i.ToString()), 1);
                }
                else if (i % 7 == 0)
                {
                    builder.AddPattern(Pattern.Match(i.ToString()), 2);
                }
            }

            var langs = new List <ISet <int> >();
            {
                var s1 = new HashSet <int>();
                s1.Add(1);
                s1.Add(3);
                var s2 = new HashSet <int>();
                s2.Add(2);
                s2.Add(3);
                langs.Add(s1);
                langs.Add(s2);
            }
            var stopWatch = new Stopwatch();

            stopWatch.Start();
            var starts    = builder.Build(langs, null);
            var start3    = starts[0];
            var start7    = starts[1];
            var numstates = CountStates(start3, start7);

            stopWatch.Stop();
            var telapsed = stopWatch.ElapsedMilliseconds;

            helper.WriteLine($"Minimized 1000000 numbers -> divisible by 7 and 3 (down to {numstates} states) in {telapsed * .001} seconds");
            for (var i = 0; i < 100000; ++i)
            {
                if (i % 21 == 0)
                {
                    Assert.True(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out var result));
                    Assert.Equal(3, result);
                    Assert.True(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out result));
                    Assert.Equal(3, result);
                }
                else if (i % 3 == 0)
                {
                    Assert.True(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out var result));
                    Assert.Equal(1, result);
                    Assert.False(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out _));
                }
                else if (i % 7 == 0)
                {
                    Assert.False(StringMatcher <int> .MatchWholeString(start3, i.ToString(), out _));
                    Assert.True(StringMatcher <int> .MatchWholeString(start7, i.ToString(), out var result));
                    Assert.Equal(2, result);
                }
            }

            Assert.Equal(137, numstates);
        }
Exemple #17
0
        public void Test()
        {
            var builder = new DfaBuilder <JavaToken?>(null);

            foreach (JavaToken tok in Enum.GetValues(typeof(JavaToken)))
            {
                builder.AddPattern(tok.Pattern(), tok);
            }

            var start   = builder.Build(new HashSet <JavaToken?>(Enum.GetValues(typeof(JavaToken)).Cast <JavaToken?>()), null);
            var auxInfo = new DfaAuxiliaryInformation <JavaToken?>(new[] { start });

            //calculate destinies the slow way
            var states        = auxInfo.GetStatesByNumber();
            var slowDestinies = new List <ISet <JavaToken?> >(states.Count);
            var numStates     = states.Count;

            for (var i = 0; i < numStates; i++)
            {
                slowDestinies.Add(new HashSet <JavaToken?>());
                var state = states[i];
                if (state.IsAccepting)
                {
                    slowDestinies[i].Add(state.Match);
                }
            }

            //AtomicBoolean again = new AtomicBoolean(true);
            var again = true;

            while (again)
            {
                again = false;
                for (var i = 0; i < numStates; ++i)
                {
                    var set   = slowDestinies[i];
                    var state = states[i];
                    state.EnumerateTransitions((f, l, target) =>
                    {
                        var targetSet = slowDestinies[target.StateNumber];
                        var a         = true;
                        foreach (var token in targetSet)
                        {
                            if (!set.Add(token))
                            {
                                a = false;
                            }
                        }

                        if (a)
                        {
                            again = true;
                        }
                    });
                }
            }

            /*
             *  PrettyPrinter p = new PrettyPrinter(true);
             *  PrintWriter pw = new PrintWriter(System.out);
             *  p.print(pw, start);
             *  pw.flush();
             */
            var destinies = auxInfo.GetDestinies();

            for (var i = 0; i < numStates; ++i)
            {
                var       set         = slowDestinies[i];
                JavaToken?wantDestiny = null;
                if (set.Count == 1)
                {
                    wantDestiny = set.FirstOrDefault();
                }

                Assert.Equal(/*"State " + i + " destiny",*/ wantDestiny, destinies[i]);
            }
        }