public void EpsilonToSelfIsIgnored() { var automata = new NonDeterministicAutomata <int>(); var q0 = automata.PushEmpty(); var q1 = automata.PushValue(1); q0.BranchTo('a', 'a', q1); q1.EpsilonTo(q1); var deterministic = q0.ToDeterministic(); var d0 = deterministic.Start; var state0 = deterministic.States[d0]; Assert.That(state0.Branches.Count, Is.EqualTo(1)); Assert.That(state0.Branches[0].Begin, Is.EqualTo('a')); Assert.That(state0.Branches[0].End, Is.EqualTo('a')); Assert.That(state0.HasValue, Is.False); Assert.That(deterministic.TryFollow(d0, 'a', out var d1), Is.True); var state1 = deterministic.States[d1]; Assert.That(state1.Branches.Count, Is.EqualTo(0)); Assert.That(state1.HasValue, Is.True); Assert.That(state1.Value, Is.EqualTo(1)); }
static GlobCompiler() { var automata = new NonDeterministicAutomata <Lexem>(); var escape = automata.PushEmpty(); escape.BranchTo('*', '*', automata.PushValue(new Lexem(LexemType.Escape, '*'))); escape.BranchTo('?', '?', automata.PushValue(new Lexem(LexemType.Escape, '?'))); escape.BranchTo('[', '[', automata.PushValue(new Lexem(LexemType.Escape, '['))); escape.BranchTo(']', ']', automata.PushValue(new Lexem(LexemType.Escape, ']'))); escape.BranchTo('\\', '\\', automata.PushValue(new Lexem(LexemType.Escape, '\\'))); var character = automata.PushEmpty(); var literal = automata.PushValue(new Lexem(LexemType.Literal)); character.BranchTo(-1, -1, automata.PushValue(new Lexem(LexemType.End))); character.BranchTo(char.MinValue, ' ', literal); character.BranchTo('!', '!', automata.PushValue(new Lexem(LexemType.Negate))); character.BranchTo('"', ')', literal); character.BranchTo('*', '*', automata.PushValue(new Lexem(LexemType.ZeroOrMore))); character.BranchTo('+', ',', literal); character.BranchTo('-', '-', automata.PushValue(new Lexem(LexemType.Range))); character.BranchTo('.', '>', literal); character.BranchTo('?', '?', automata.PushValue(new Lexem(LexemType.Wildcard))); character.BranchTo('@', 'Z', literal); character.BranchTo('[', '[', automata.PushValue(new Lexem(LexemType.ClassBegin))); character.BranchTo('\\', '\\', escape); character.BranchTo(']', ']', automata.PushValue(new Lexem(LexemType.ClassEnd))); character.BranchTo('^', char.MaxValue, literal); Matcher = new AutomataMatcher <Lexem>(character.ToDeterministic()); }
protected PatternCompiler(IMatcher <Lexem> patternLexer) { var automata = new NonDeterministicAutomata <TValue>(); _automata = automata; _patternLexer = patternLexer; _start = automata.PushEmpty(); }
public void ConvertToDeterministicDetectConflict() { var automata = new NonDeterministicAutomata <int>(); var q0 = automata.PushEmpty(); var q1 = automata.PushValue(17); var q2 = automata.PushValue(42); q0.BranchTo('a', 'a', q1); q0.BranchTo('a', 'a', q2); Assert.Throws <InvalidOperationException>(() => q0.ToDeterministic()); }
public void EpsilonToValue() { var automata = new NonDeterministicAutomata <int>(); var q0 = automata.PushEmpty(); var q1 = automata.PushEmpty(); var q2 = automata.PushValue(22); q0.BranchTo('a', 'a', q1); q1.EpsilonTo(q2); ConvertAndMatch(q0, "a", true, "a", 22); }
static RegexCompiler() { var automata = new NonDeterministicAutomata <Lexem>(); var escape = automata.PushEmpty(); escape.BranchTo('(', '(', automata.PushValue(new Lexem(LexemType.Escape, '('))); escape.BranchTo(')', ')', automata.PushValue(new Lexem(LexemType.Escape, ')'))); escape.BranchTo('*', '*', automata.PushValue(new Lexem(LexemType.Escape, '*'))); escape.BranchTo('+', '+', automata.PushValue(new Lexem(LexemType.Escape, '+'))); escape.BranchTo('-', '-', automata.PushValue(new Lexem(LexemType.Escape, '-'))); escape.BranchTo('.', '.', automata.PushValue(new Lexem(LexemType.Escape, '.'))); escape.BranchTo('?', '?', automata.PushValue(new Lexem(LexemType.Escape, '?'))); escape.BranchTo('[', '[', automata.PushValue(new Lexem(LexemType.Escape, '['))); escape.BranchTo(']', ']', automata.PushValue(new Lexem(LexemType.Escape, ']'))); escape.BranchTo('\\', '\\', automata.PushValue(new Lexem(LexemType.Escape, '\\'))); escape.BranchTo('^', '^', automata.PushValue(new Lexem(LexemType.Escape, '^'))); escape.BranchTo('{', '{', automata.PushValue(new Lexem(LexemType.Escape, '{'))); escape.BranchTo('|', '|', automata.PushValue(new Lexem(LexemType.Escape, '|'))); escape.BranchTo('}', '}', automata.PushValue(new Lexem(LexemType.Escape, '}'))); escape.BranchTo('n', 'n', automata.PushValue(new Lexem(LexemType.Escape, '\n'))); escape.BranchTo('r', 'r', automata.PushValue(new Lexem(LexemType.Escape, '\r'))); escape.BranchTo('t', 't', automata.PushValue(new Lexem(LexemType.Escape, '\t'))); var character = automata.PushEmpty(); var literal = automata.PushValue(new Lexem(LexemType.Literal)); character.BranchTo(-1, -1, automata.PushValue(new Lexem(LexemType.End))); character.BranchTo(char.MinValue, '\'', literal); character.BranchTo('(', '(', automata.PushValue(new Lexem(LexemType.SequenceBegin))); character.BranchTo(')', ')', automata.PushValue(new Lexem(LexemType.SequenceEnd))); character.BranchTo('*', '*', automata.PushValue(new Lexem(LexemType.ZeroOrMore))); character.BranchTo('+', '+', automata.PushValue(new Lexem(LexemType.OneOrMore))); character.BranchTo(',', ',', automata.PushValue(new Lexem(LexemType.Comma))); character.BranchTo('-', '-', automata.PushValue(new Lexem(LexemType.Range))); character.BranchTo('.', '.', automata.PushValue(new Lexem(LexemType.Wildcard))); character.BranchTo('/', '/', literal); character.BranchTo('0', '9', automata.PushValue(new Lexem(LexemType.Digit))); character.BranchTo(':', '>', literal); character.BranchTo('?', '?', automata.PushValue(new Lexem(LexemType.ZeroOrOne))); character.BranchTo('@', 'Z', literal); character.BranchTo('[', '[', automata.PushValue(new Lexem(LexemType.ClassBegin))); character.BranchTo('\\', '\\', escape); character.BranchTo(']', ']', automata.PushValue(new Lexem(LexemType.ClassEnd))); character.BranchTo('^', '^', automata.PushValue(new Lexem(LexemType.Negate))); character.BranchTo('_', 'z', literal); character.BranchTo('{', '{', automata.PushValue(new Lexem(LexemType.RepeatBegin))); character.BranchTo('|', '|', automata.PushValue(new Lexem(LexemType.Alternative))); character.BranchTo('}', '}', automata.PushValue(new Lexem(LexemType.RepeatEnd))); character.BranchTo('~', char.MaxValue, literal); Matcher = new AutomataMatcher <Lexem>(character.ToDeterministic()); }
public void EpsilonTo(string pattern, bool success, string expectedCapture, int expectedValue) { var automata = new NonDeterministicAutomata <int>(); var q0 = automata.PushEmpty(); var q1 = automata.PushEmpty(); var q2 = automata.PushValue(17); q0.EpsilonTo(q1); q0.BranchTo('a', 'a', q0); q1.BranchTo('b', 'b', q2); ConvertAndMatch(q0, pattern, success, expectedCapture, expectedValue); }
public void ConnectToOverlaps(string pattern, bool success, string expectedCapture, int expectedValue) { var automata = new NonDeterministicAutomata <int>(); var q0 = automata.PushEmpty(); var q1 = automata.PushEmpty(); var q2 = automata.PushEmpty(); var q3 = automata.PushValue(17); var q4 = automata.PushValue(42); q0.BranchTo('a', 'b', q0); q0.BranchTo('a', 'c', q1); q0.BranchTo('b', 'd', q2); q1.BranchTo('e', 'e', q3); q2.BranchTo('f', 'f', q4); ConvertAndMatch(q0, pattern, success, expectedCapture, expectedValue); }
/// <Summary> /// Convert compiled regular expression node into graph of non-deterministic /// states connected to given parent state and return final state of /// produced graph. /// </Summary> public NonDeterministicNode <TValue> ConnectTo <TValue>(NonDeterministicAutomata <TValue> automata, NonDeterministicNode <TValue> parent) { NonDeterministicNode <TValue> next; switch (Type) { case NodeType.Alternative: // /-- [child1] --\ // [parent] ---- [child2] ---> [next] // \-- [child3] --/ next = automata.PushEmpty(); foreach (var child in Children) { child.ConnectTo(automata, parent).EpsilonTo(next); } break; case NodeType.Character: // [parent] --{begin, end}--> [next] next = automata.PushEmpty(); foreach (var range in Ranges) { parent.BranchTo(range.Begin, range.End, next); } break; case NodeType.Repeat: // /-- [child] --\ * (max - min) // [parent] - [child] * min ----------------> [next] // \-- [child] --/ * infinite // Convert until lower bound is reached for (var i = 0; i < RepeatMin; ++i) { parent = Children[0].ConnectTo(automata, parent); } next = automata.PushEmpty(); parent.EpsilonTo(next); // Bounded repeat sequence, perform conversion (max - min) times if (RepeatMax >= 0) { for (var i = 0; i < RepeatMax - RepeatMin; ++i) { parent = Children[0].ConnectTo(automata, parent); parent.EpsilonTo(next); } } // Unbounded repeat sequence, loop converted state over itself else { var loop = Children[0].ConnectTo(automata, parent); loop.EpsilonTo(parent); loop.EpsilonTo(next); } return(next); case NodeType.Sequence: // [parent] -> [child1] -> [child2] -> ... -> [next] next = parent; foreach (var child in Children) { next = child.ConnectTo(automata, next); } break; default: throw new InvalidOperationException(); } return(next); }
public NonDeterministicNode(NonDeterministicAutomata <TValue> automata, int index) { _automata = automata; _index = index; }