/// <summary> /// Build a list of start states from the parent node. /// </summary> /// <param name="parent">to build start state for</param> /// <param name="variableDefinitions">each variable and its expressions</param> /// <param name="variableStreams">variable name and its stream number</param> /// <param name="exprRequiresMultimatchState">indicator whether multi-match state required</param> /// <returns>strand of regex state nodes</returns> protected internal static RowRecogNFAStrandResult BuildStartStates( RowRecogExprNode parent, IDictionary<string, ExprNode> variableDefinitions, IDictionary<string, Pair<int, bool>> variableStreams, bool[] exprRequiresMultimatchState ) { var nodeNumStack = new Stack<int>(); RowRecogNFAStrand strand = RecursiveBuildStatesInternal( parent, variableDefinitions, variableStreams, nodeNumStack, exprRequiresMultimatchState); // add end state var end = new RowRecogNFAStateEndForge(); end.NodeNumFlat = -1; foreach (RowRecogNFAStateForgeBase endStates in strand.EndStates) { endStates.AddState(end); } // assign node num as a counter var nodeNumberFlat = 0; foreach (RowRecogNFAStateForgeBase theBase in strand.AllStates) { theBase.NodeNumFlat = nodeNumberFlat++; } return new RowRecogNFAStrandResult(new List<RowRecogNFAStateForge>(strand.StartStates), strand.AllStates); }
private static RowRecogNFAStrand RecursiveBuildStatesInternal( RowRecogExprNode node, IDictionary<string, ExprNode> variableDefinitions, IDictionary<string, Pair<int, bool>> variableStreams, Stack<int> nodeNumStack, bool[] exprRequiresMultimatchState ) { if (node is RowRecogExprNodeAlteration) { var nodeNum = 0; IList<RowRecogNFAStateForgeBase> cumulativeStartStates = new List<RowRecogNFAStateForgeBase>(); IList<RowRecogNFAStateForgeBase> cumulativeStates = new List<RowRecogNFAStateForgeBase>(); IList<RowRecogNFAStateForgeBase> cumulativeEndStates = new List<RowRecogNFAStateForgeBase>(); var isPassthrough = false; foreach (var child in node.ChildNodes) { nodeNumStack.Push(nodeNum); var strand = RecursiveBuildStatesInternal( child, variableDefinitions, variableStreams, nodeNumStack, exprRequiresMultimatchState); nodeNumStack.Pop(); cumulativeStartStates.AddAll(strand.StartStates); cumulativeStates.AddAll(strand.AllStates); cumulativeEndStates.AddAll(strand.EndStates); if (strand.IsPassthrough) { isPassthrough = true; } nodeNum++; } return new RowRecogNFAStrand( cumulativeStartStates, cumulativeEndStates, cumulativeStates, isPassthrough); } if (node is RowRecogExprNodeConcatenation) { var nodeNum = 0; var isPassthrough = true; IList<RowRecogNFAStateForgeBase> cumulativeStates = new List<RowRecogNFAStateForgeBase>(); var strands = new RowRecogNFAStrand[node.ChildNodes.Count]; foreach (var child in node.ChildNodes) { nodeNumStack.Push(nodeNum); strands[nodeNum] = RecursiveBuildStatesInternal( child, variableDefinitions, variableStreams, nodeNumStack, exprRequiresMultimatchState); nodeNumStack.Pop(); cumulativeStates.AddAll(strands[nodeNum].AllStates); if (!strands[nodeNum].IsPassthrough) { isPassthrough = false; } nodeNum++; } // determine start states: all states until the first non-passthrough start state IList<RowRecogNFAStateForgeBase> startStates = new List<RowRecogNFAStateForgeBase>(); for (var i = 0; i < strands.Length; i++) { startStates.AddAll(strands[i].StartStates); if (!strands[i].IsPassthrough) { break; } } // determine end states: all states from the back until the last non-passthrough end state IList<RowRecogNFAStateForgeBase> endStates = new List<RowRecogNFAStateForgeBase>(); for (var i = strands.Length - 1; i >= 0; i--) { endStates.AddAll(strands[i].EndStates); if (!strands[i].IsPassthrough) { break; } } // hook up the end state of each strand with the start states of each next strand for (var i = strands.Length - 1; i >= 1; i--) { var current = strands[i]; for (var j = i - 1; j >= 0; j--) { var prior = strands[j]; foreach (RowRecogNFAStateForgeBase endState in prior.EndStates) { foreach (RowRecogNFAStateForgeBase startState in current.StartStates) { endState.AddState(startState); } } if (!prior.IsPassthrough) { break; } } } return new RowRecogNFAStrand(startStates, endStates, cumulativeStates, isPassthrough); } if (node is RowRecogExprNodeNested) { var nested = (RowRecogExprNodeNested) node; nodeNumStack.Push(0); var strand = RecursiveBuildStatesInternal( node.ChildNodes[0], variableDefinitions, variableStreams, nodeNumStack, exprRequiresMultimatchState); nodeNumStack.Pop(); var isPassthrough = strand.IsPassthrough || nested.Type.IsOptional(); // if this is a repeating node then pipe back each end state to each begin state if (nested.Type.IsMultipleMatches()) { foreach (RowRecogNFAStateForgeBase endstate in strand.EndStates) { foreach (RowRecogNFAStateForgeBase startstate in strand.StartStates) { if (!endstate.NextStates.Contains(startstate)) { endstate.NextStates.Add(startstate); } } } } return new RowRecogNFAStrand(strand.StartStates, strand.EndStates, strand.AllStates, isPassthrough); } var atom = (RowRecogExprNodeAtom) node; // assign stream number for single-variables for most direct expression eval; multiple-variable gets -1 var streamNum = variableStreams.Get(atom.Tag).First; var multiple = variableStreams.Get(atom.Tag).Second; var expression = variableDefinitions.Get(atom.Tag); var exprRequiresMultimatch = exprRequiresMultimatchState[streamNum]; RowRecogNFAStateForgeBase nextState; if (atom.Type == RowRecogNFATypeEnum.ZERO_TO_MANY || atom.Type == RowRecogNFATypeEnum.ZERO_TO_MANY_RELUCTANT) { nextState = new RowRecogNFAStateZeroToManyForge( ToString(nodeNumStack), atom.Tag, streamNum, multiple, atom.Type.IsGreedy(), exprRequiresMultimatch, expression); } else if (atom.Type == RowRecogNFATypeEnum.ONE_TO_MANY || atom.Type == RowRecogNFATypeEnum.ONE_TO_MANY_RELUCTANT) { nextState = new RowRecogNFAStateOneToManyForge( ToString(nodeNumStack), atom.Tag, streamNum, multiple, atom.Type.IsGreedy(), exprRequiresMultimatch, expression); } else if (atom.Type == RowRecogNFATypeEnum.ONE_OPTIONAL || atom.Type == RowRecogNFATypeEnum.ONE_OPTIONAL_RELUCTANT) { nextState = new RowRecogNFAStateOneOptionalForge( ToString(nodeNumStack), atom.Tag, streamNum, multiple, atom.Type.IsGreedy(), exprRequiresMultimatch, expression); } else if (expression == null) { nextState = new RowRecogNFAStateAnyOneForge(ToString(nodeNumStack), atom.Tag, streamNum, multiple); } else { nextState = new RowRecogNFAStateFilterForge( ToString(nodeNumStack), atom.Tag, streamNum, multiple, exprRequiresMultimatch, expression); } return new RowRecogNFAStrand( Collections.SingletonList(nextState), Collections.SingletonList(nextState), Collections.SingletonList(nextState), atom.Type.IsOptional()); }