Esempio n. 1
0
        public void EpsilonToSelfIsIgnored()
        {
            var automata = new NonDeterministicAutomata <int>();
            var q0       = automata.PushEmpty();
            var q1       = automata.PushValue(1);

            q0.BranchTo('a', 'a', q1);
            q1.EpsilonTo(q1);

            var deterministic = q0.ToDeterministic();
            var d0            = deterministic.Start;
            var state0        = deterministic.States[d0];

            Assert.That(state0.Branches.Count, Is.EqualTo(1));
            Assert.That(state0.Branches[0].Begin, Is.EqualTo('a'));
            Assert.That(state0.Branches[0].End, Is.EqualTo('a'));
            Assert.That(state0.HasValue, Is.False);

            Assert.That(deterministic.TryFollow(d0, 'a', out var d1), Is.True);

            var state1 = deterministic.States[d1];

            Assert.That(state1.Branches.Count, Is.EqualTo(0));
            Assert.That(state1.HasValue, Is.True);
            Assert.That(state1.Value, Is.EqualTo(1));
        }
Esempio n. 2
0
        static GlobCompiler()
        {
            var automata = new NonDeterministicAutomata <Lexem>();
            var escape   = automata.PushEmpty();

            escape.BranchTo('*', '*', automata.PushValue(new Lexem(LexemType.Escape, '*')));
            escape.BranchTo('?', '?', automata.PushValue(new Lexem(LexemType.Escape, '?')));
            escape.BranchTo('[', '[', automata.PushValue(new Lexem(LexemType.Escape, '[')));
            escape.BranchTo(']', ']', automata.PushValue(new Lexem(LexemType.Escape, ']')));
            escape.BranchTo('\\', '\\', automata.PushValue(new Lexem(LexemType.Escape, '\\')));

            var character = automata.PushEmpty();
            var literal   = automata.PushValue(new Lexem(LexemType.Literal));

            character.BranchTo(-1, -1, automata.PushValue(new Lexem(LexemType.End)));
            character.BranchTo(char.MinValue, ' ', literal);
            character.BranchTo('!', '!', automata.PushValue(new Lexem(LexemType.Negate)));
            character.BranchTo('"', ')', literal);
            character.BranchTo('*', '*', automata.PushValue(new Lexem(LexemType.ZeroOrMore)));
            character.BranchTo('+', ',', literal);
            character.BranchTo('-', '-', automata.PushValue(new Lexem(LexemType.Range)));
            character.BranchTo('.', '>', literal);
            character.BranchTo('?', '?', automata.PushValue(new Lexem(LexemType.Wildcard)));
            character.BranchTo('@', 'Z', literal);
            character.BranchTo('[', '[', automata.PushValue(new Lexem(LexemType.ClassBegin)));
            character.BranchTo('\\', '\\', escape);
            character.BranchTo(']', ']', automata.PushValue(new Lexem(LexemType.ClassEnd)));
            character.BranchTo('^', char.MaxValue, literal);

            Matcher = new AutomataMatcher <Lexem>(character.ToDeterministic());
        }
Esempio n. 3
0
        protected PatternCompiler(IMatcher <Lexem> patternLexer)
        {
            var automata = new NonDeterministicAutomata <TValue>();

            _automata     = automata;
            _patternLexer = patternLexer;
            _start        = automata.PushEmpty();
        }
Esempio n. 4
0
        public void ConvertToDeterministicDetectConflict()
        {
            var automata = new NonDeterministicAutomata <int>();
            var q0       = automata.PushEmpty();
            var q1       = automata.PushValue(17);
            var q2       = automata.PushValue(42);

            q0.BranchTo('a', 'a', q1);
            q0.BranchTo('a', 'a', q2);

            Assert.Throws <InvalidOperationException>(() => q0.ToDeterministic());
        }
Esempio n. 5
0
        public void EpsilonToValue()
        {
            var automata = new NonDeterministicAutomata <int>();
            var q0       = automata.PushEmpty();
            var q1       = automata.PushEmpty();
            var q2       = automata.PushValue(22);

            q0.BranchTo('a', 'a', q1);
            q1.EpsilonTo(q2);

            ConvertAndMatch(q0, "a", true, "a", 22);
        }
Esempio n. 6
0
        static RegexCompiler()
        {
            var automata = new NonDeterministicAutomata <Lexem>();
            var escape   = automata.PushEmpty();

            escape.BranchTo('(', '(', automata.PushValue(new Lexem(LexemType.Escape, '(')));
            escape.BranchTo(')', ')', automata.PushValue(new Lexem(LexemType.Escape, ')')));
            escape.BranchTo('*', '*', automata.PushValue(new Lexem(LexemType.Escape, '*')));
            escape.BranchTo('+', '+', automata.PushValue(new Lexem(LexemType.Escape, '+')));
            escape.BranchTo('-', '-', automata.PushValue(new Lexem(LexemType.Escape, '-')));
            escape.BranchTo('.', '.', automata.PushValue(new Lexem(LexemType.Escape, '.')));
            escape.BranchTo('?', '?', automata.PushValue(new Lexem(LexemType.Escape, '?')));
            escape.BranchTo('[', '[', automata.PushValue(new Lexem(LexemType.Escape, '[')));
            escape.BranchTo(']', ']', automata.PushValue(new Lexem(LexemType.Escape, ']')));
            escape.BranchTo('\\', '\\', automata.PushValue(new Lexem(LexemType.Escape, '\\')));
            escape.BranchTo('^', '^', automata.PushValue(new Lexem(LexemType.Escape, '^')));
            escape.BranchTo('{', '{', automata.PushValue(new Lexem(LexemType.Escape, '{')));
            escape.BranchTo('|', '|', automata.PushValue(new Lexem(LexemType.Escape, '|')));
            escape.BranchTo('}', '}', automata.PushValue(new Lexem(LexemType.Escape, '}')));
            escape.BranchTo('n', 'n', automata.PushValue(new Lexem(LexemType.Escape, '\n')));
            escape.BranchTo('r', 'r', automata.PushValue(new Lexem(LexemType.Escape, '\r')));
            escape.BranchTo('t', 't', automata.PushValue(new Lexem(LexemType.Escape, '\t')));

            var character = automata.PushEmpty();
            var literal   = automata.PushValue(new Lexem(LexemType.Literal));

            character.BranchTo(-1, -1, automata.PushValue(new Lexem(LexemType.End)));
            character.BranchTo(char.MinValue, '\'', literal);
            character.BranchTo('(', '(', automata.PushValue(new Lexem(LexemType.SequenceBegin)));
            character.BranchTo(')', ')', automata.PushValue(new Lexem(LexemType.SequenceEnd)));
            character.BranchTo('*', '*', automata.PushValue(new Lexem(LexemType.ZeroOrMore)));
            character.BranchTo('+', '+', automata.PushValue(new Lexem(LexemType.OneOrMore)));
            character.BranchTo(',', ',', automata.PushValue(new Lexem(LexemType.Comma)));
            character.BranchTo('-', '-', automata.PushValue(new Lexem(LexemType.Range)));
            character.BranchTo('.', '.', automata.PushValue(new Lexem(LexemType.Wildcard)));
            character.BranchTo('/', '/', literal);
            character.BranchTo('0', '9', automata.PushValue(new Lexem(LexemType.Digit)));
            character.BranchTo(':', '>', literal);
            character.BranchTo('?', '?', automata.PushValue(new Lexem(LexemType.ZeroOrOne)));
            character.BranchTo('@', 'Z', literal);
            character.BranchTo('[', '[', automata.PushValue(new Lexem(LexemType.ClassBegin)));
            character.BranchTo('\\', '\\', escape);
            character.BranchTo(']', ']', automata.PushValue(new Lexem(LexemType.ClassEnd)));
            character.BranchTo('^', '^', automata.PushValue(new Lexem(LexemType.Negate)));
            character.BranchTo('_', 'z', literal);
            character.BranchTo('{', '{', automata.PushValue(new Lexem(LexemType.RepeatBegin)));
            character.BranchTo('|', '|', automata.PushValue(new Lexem(LexemType.Alternative)));
            character.BranchTo('}', '}', automata.PushValue(new Lexem(LexemType.RepeatEnd)));
            character.BranchTo('~', char.MaxValue, literal);

            Matcher = new AutomataMatcher <Lexem>(character.ToDeterministic());
        }
Esempio n. 7
0
        public void EpsilonTo(string pattern, bool success, string expectedCapture, int expectedValue)
        {
            var automata = new NonDeterministicAutomata <int>();
            var q0       = automata.PushEmpty();
            var q1       = automata.PushEmpty();
            var q2       = automata.PushValue(17);

            q0.EpsilonTo(q1);
            q0.BranchTo('a', 'a', q0);
            q1.BranchTo('b', 'b', q2);

            ConvertAndMatch(q0, pattern, success, expectedCapture, expectedValue);
        }
Esempio n. 8
0
        public void ConnectToOverlaps(string pattern, bool success, string expectedCapture, int expectedValue)
        {
            var automata = new NonDeterministicAutomata <int>();
            var q0       = automata.PushEmpty();
            var q1       = automata.PushEmpty();
            var q2       = automata.PushEmpty();
            var q3       = automata.PushValue(17);
            var q4       = automata.PushValue(42);

            q0.BranchTo('a', 'b', q0);
            q0.BranchTo('a', 'c', q1);
            q0.BranchTo('b', 'd', q2);
            q1.BranchTo('e', 'e', q3);
            q2.BranchTo('f', 'f', q4);

            ConvertAndMatch(q0, pattern, success, expectedCapture, expectedValue);
        }
Esempio n. 9
0
        /// <Summary>
        /// Convert compiled regular expression node into graph of non-deterministic
        /// states connected to given parent state and return final state of
        /// produced graph.
        /// </Summary>
        public NonDeterministicNode <TValue> ConnectTo <TValue>(NonDeterministicAutomata <TValue> automata, NonDeterministicNode <TValue> parent)
        {
            NonDeterministicNode <TValue> next;

            switch (Type)
            {
            case NodeType.Alternative:
                //           /-- [child1] --\
                // [parent] ---- [child2] ---> [next]
                //           \-- [child3] --/

                next = automata.PushEmpty();

                foreach (var child in Children)
                {
                    child.ConnectTo(automata, parent).EpsilonTo(next);
                }

                break;

            case NodeType.Character:
                // [parent] --{begin, end}--> [next]

                next = automata.PushEmpty();

                foreach (var range in Ranges)
                {
                    parent.BranchTo(range.Begin, range.End, next);
                }

                break;

            case NodeType.Repeat:
                //                           /-- [child] --\ * (max - min)
                // [parent] - [child] * min ----------------> [next]
                //                           \-- [child] --/ * infinite

                // Convert until lower bound is reached
                for (var i = 0; i < RepeatMin; ++i)
                {
                    parent = Children[0].ConnectTo(automata, parent);
                }

                next = automata.PushEmpty();

                parent.EpsilonTo(next);

                // Bounded repeat sequence, perform conversion (max - min) times
                if (RepeatMax >= 0)
                {
                    for (var i = 0; i < RepeatMax - RepeatMin; ++i)
                    {
                        parent = Children[0].ConnectTo(automata, parent);
                        parent.EpsilonTo(next);
                    }
                }

                // Unbounded repeat sequence, loop converted state over itself
                else
                {
                    var loop = Children[0].ConnectTo(automata, parent);

                    loop.EpsilonTo(parent);
                    loop.EpsilonTo(next);
                }

                return(next);

            case NodeType.Sequence:
                // [parent] -> [child1] -> [child2] -> ... -> [next]

                next = parent;

                foreach (var child in Children)
                {
                    next = child.ConnectTo(automata, next);
                }

                break;

            default:
                throw new InvalidOperationException();
            }

            return(next);
        }
Esempio n. 10
0
 public NonDeterministicNode(NonDeterministicAutomata <TValue> automata, int index)
 {
     _automata = automata;
     _index    = index;
 }