private void EvaluateLook( int start, int end, FSA <TValue> fsa, LookAheadQuantifier quantifier, AstConcatNode concatNode, ORegexOptions options) { bool isBehind = options.HasFlag(ORegexOptions.ReversePattern) ? !quantifier.IsBehind : quantifier.IsBehind; bool isNegative = quantifier.IsNegative; var condOptions = isBehind ? ORegexOptions.RightToLeft : ORegexOptions.None; var concat = new AstConcatNode(concatNode.Children, concatNode.Range); var root = new AstRootNode(concat, true, false, concat.Range, new[] { ORegexAstFactory <TValue> .MainCaptureName }); var fa = Create(root, condOptions); var oregex = new ORegex <TValue>(fa, condOptions); var func = new ORegexPredicateEdge <TValue>("#look", oregex, isNegative, isBehind); EvaluateCondition(start, end, fsa, func); }
private void EvaluateRepeat(int start, int end, FSA <TValue> fsa, AstRepeatNode astRepeatNode, ORegexOptions options) { var toRepeat = astRepeatNode.Argument; var prev = start; for (int i = 0; i < astRepeatNode.MinCount; i++) { var next = CreateNewState(fsa); Evaluate(prev, next, fsa, toRepeat, options); prev = next; } if (astRepeatNode.MaxCount == int.MaxValue) { RepeatZeroOrInfinite(prev, end, fsa, toRepeat, astRepeatNode.IsLazy, options); } else { int count = astRepeatNode.MaxCount - astRepeatNode.MinCount - 1; int next; for (int i = 0; i < count; i++) { next = CreateNewState(fsa); RepeatZeroOrOne(prev, next, fsa, toRepeat, astRepeatNode.IsLazy, options); prev = next; } next = end; RepeatZeroOrOne(prev, next, fsa, toRepeat, astRepeatNode.IsLazy, options); } }
public void Evaluate(int start, int end, FSA <TValue> fsa, AstNodeBase node, ORegexOptions options) { // ReSharper disable once CanBeReplacedWithTryCastAndCheckForNull if (node is AstAtomNode <TValue> ) { EvaluateAtom(start, end, fsa, (AstAtomNode <TValue>)node); } else if (node is AstConcatNode) { EvaluateConcat(start, end, fsa, (AstConcatNode)node, options); } else if (node is AstOrNode) { EvaluateOr(start, end, fsa, (AstOrNode)node, options); } else if (node is AstRepeatNode) { EvaluateRepeat(start, end, fsa, (AstRepeatNode)node, options); } else if (node is AstRootNode) { EvaluateRoot(start, end, fsa, (AstRootNode)node, options); } else { throw new NotImplementedException(node.GetType().Name); } }
private void EvaluateOr(int start, int end, FSA <TValue> fsa, AstOrNode node, ORegexOptions options) { foreach (var child in node.GetChildren()) { Evaluate(start, end, fsa, child, options); } }
/// <summary> /// Reverse any FSA. /// Warning: Type of automaton can change from NFA to DFA or DFA to NFA. /// </summary> /// <param name="fsa"></param> /// <returns></returns> public FSA <TValue> ReverseFsa(FSA <TValue> fsa) { return (new FSA <TValue>(fsa.Name, fsa.Transitions.Select(x => new FSATransition <TValue>(x.EndState, x.Condition, x.BeginState)), fsa.F, fsa.Q0) { ExactBegin = fsa.ExactBegin, ExactEnd = fsa.ExactEnd, CaptureNames = fsa.CaptureNames }); }
private void RepeatZeroOrOne(int start, int end, FSA <TValue> fsa, AstNodeBase node, bool isLasy, ORegexOptions options) { if (isLasy) { fsa.AddEpsilonTransition(start, end); Evaluate(start, end, fsa, node, options); } else { Evaluate(start, end, fsa, node, options); fsa.AddEpsilonTransition(start, end); } }
public FSA <TValue> CreateRawFsa(AstRootNode root, ORegexOptions options) { var result = new FSA <TValue>(root.CaptureGroupNames[0]) { CaptureNames = root.CaptureGroupNames }; var start = result.NewState(); var end = result.NewState(); Evaluate(start, end, result, root, options); result.AddFinal(end); result.AddStart(start); return(result); }
private void EvaluateConcat(int start, int end, FSA <TValue> fsa, AstConcatNode node, ORegexOptions options) { var group = node as AstGroupNode; if (group != null) { if (group.Quantifier != null) { // ReSharper disable once CanBeReplacedWithTryCastAndCheckForNull if (group.Quantifier is CaptureQuantifier) { var captureQ = (CaptureQuantifier)group.Quantifier; var sys = new SystemPredicateEdge <TValue>("#capture") { IsCapture = true, CaptureName = captureQ.CaptureName, CaptureId = captureQ.CaptureId }; var startTmp = CreateNewState(fsa); fsa.AddTransition(start, sys, startTmp); start = startTmp; var endTmp = CreateNewState(fsa); fsa.AddTransition(endTmp, sys, end); end = endTmp; } else if (group.Quantifier is LookAheadQuantifier) { var lookQ = (LookAheadQuantifier)group.Quantifier; EvaluateLook(start, end, fsa, lookQ, group, options); return; } } } var prev = start; int next; var children = node.GetChildren().ToArray(); for (int i = 0; i < children.Length - 1; i++) { next = CreateNewState(fsa); Evaluate(prev, next, fsa, children[i], options); prev = next; } next = end; Evaluate(prev, next, fsa, children[children.Length - 1], options); }
public CFSA(FSA <TValue> fsa) { ExactBegin = fsa.ExactBegin; ExactEnd = fsa.ExactEnd; Name = fsa.Name; CaptureNames = fsa.CaptureNames; _transitionMatrix = new IFSATransition <TValue> [fsa.StateCount][]; foreach (var look in fsa.Transitions.ToLookup(x => x.BeginState, x => x)) { _transitionMatrix[look.Key] = look.ToArray(); } _startState = fsa.Q0.First(); _finalsLookup = new bool[_transitionMatrix.Length]; foreach (var f in fsa.F) { _finalsLookup[f] = true; } }
private void RepeatZeroOrInfinite(int start, int end, FSA <TValue> fsa, AstNodeBase predicate, bool isLasy, ORegexOptions options) { var tmp = CreateNewState(fsa); if (isLasy) { fsa.AddEpsilonTransition(start, end); fsa.AddEpsilonTransition(tmp, end); Evaluate(tmp, tmp, fsa, predicate, options); fsa.AddEpsilonTransition(start, tmp); } else { Evaluate(tmp, tmp, fsa, predicate, options); fsa.AddEpsilonTransition(tmp, end); fsa.AddEpsilonTransition(start, tmp); fsa.AddEpsilonTransition(start, end); } }
/// <summary> /// Builds the Epsilon closure of states for the given NFA /// </summary> /// <param name="nfa"></param> /// <param name="states"></param> /// <returns></returns> private static Set <int> EpsilonClosure(FSA <TValue> nfa, Set <int> states) { // Push all states onto a stack Stack <int> uncheckedStack = new Stack <int>(states); // Initialize EpsilonClosure(states) to states Set <int> epsilonClosure = states; while (uncheckedStack.Count != 0) { // Pop state t, the top element, off the stack var t = uncheckedStack.Pop(); // For each state u with an edge from t to u labeled Epsilon IEnumerable <FSATransition <TValue> > transitions; if (nfa.TryGetTransitionsFrom(t, out transitions)) { foreach (var input in transitions) { if (PredicateEdgeBase <TValue> .IsEpsilon(input.Condition)) { int u = input.EndState; // If u is not already in epsilonClosure, add it and push it onto stack if (!epsilonClosure.Contains(u)) { epsilonClosure.Add(u); uncheckedStack.Push(u); } } } } } return(epsilonClosure); }
/// <summary> /// Convert any FSA to reversed DFA. /// Warning: elminates any epsilon transition. /// </summary> /// <param name="fsa"></param> /// <returns></returns> public FSA <TValue> RotateFsa(FSA <TValue> fsa) { return(ToDfa(ReverseFsa(fsa))); }
/// <summary> /// Convert any FSA to minimized DFA. /// Warning: elminates any epsilon transition. /// </summary> /// <param name="fsa"></param> /// <returns></returns> public FSA <TValue> MinimizeFsa(FSA <TValue> fsa) { fsa = RotateFsa(fsa); fsa = RotateFsa(fsa); return(fsa); }
private void EvaluateRoot(int start, int end, FSA <TValue> fsa, AstRootNode astRootNode, ORegexOptions options) { fsa.ExactBegin = astRootNode.MatchBegin; fsa.ExactEnd = astRootNode.MatchEnd; Evaluate(start, end, fsa, astRootNode.Regex, options); }
private void EvaluateCondition(int start, int end, FSA <TValue> fsa, PredicateEdgeBase <TValue> condition) { fsa.AddTransition(start, condition, end); }
private void EvaluateAtom(int start, int end, FSA <TValue> fsa, AstAtomNode <TValue> node) { EvaluateCondition(start, end, fsa, node.Condition); }
/// <summary> /// Subset machine that employs the powerset construction or subset construction algorithm. /// It creates a DFA that recognizes the same language as the given NFA. /// </summary> private static FSA <TValue> ToDfa(FSA <TValue> fsa) { FSA <TValue> dfa = new FSA <TValue>(fsa.Name) { ExactBegin = fsa.ExactBegin, ExactEnd = fsa.ExactEnd, CaptureNames = fsa.CaptureNames, }; // Sets of NFA states which is represented by some DFA state var markedStates = new HashSet <Set <int> >(); var unmarkedStates = new HashSet <Set <int> >(); // Gives a number to each state in the DFA var dfaStateNum = new Dictionary <Set <int>, int>(); var nfaInitial = fsa.Q0.ToSet(); // Initially, EpsilonClosure(nfa.initial) is the only state in the DFAs states // and it's unmarked. var first = EpsilonClosure(fsa, nfaInitial); unmarkedStates.Add(first); // The initial dfa state int dfaInitial = dfa.NewState(); dfaStateNum[first] = dfaInitial; dfa.AddStart(dfaInitial); while (unmarkedStates.Count != 0) { // Takes out one unmarked state and posteriorly mark it. var aState = unmarkedStates.First(); // Removes from the unmarked set. unmarkedStates.Remove(aState); // Inserts into the marked set. markedStates.Add(aState); // If this state contains the NFA's final state, add it to the DFA's set of // final states. if (fsa.F.Any(x => aState.Contains(x))) { dfa.AddFinal(dfaStateNum[aState]); } // For each input symbol the NFA knows... foreach (var current in fsa.Sigma) { // Next state var next = EpsilonClosure(fsa, fsa.Move(aState, current)); if (next.Count > 0) { // If we haven't examined this state before, add it to the unmarkedStates, // and make up a new number for it. if (!unmarkedStates.Contains(next) && !markedStates.Contains(next)) { unmarkedStates.Add(next); dfaStateNum.Add(next, dfa.NewState()); } var from = dfaStateNum[aState]; var to = dfaStateNum[next]; var condition = current; dfa.AddTransition(from, condition, to); } } } return(dfa); }
private int CreateNewState(FSA <TValue> fsa) { return(fsa.NewState()); }