예제 #1
0
        /// <summary>
        ///     Build a list of start states from the parent node.
        /// </summary>
        /// <param name="parent">to build start state for</param>
        /// <param name="variableDefinitions">each variable and its expressions</param>
        /// <param name="variableStreams">variable name and its stream number</param>
        /// <param name="exprRequiresMultimatchState">indicator whether multi-match state required</param>
        /// <returns>strand of regex state nodes</returns>
        protected internal static RowRecogNFAStrandResult BuildStartStates(
            RowRecogExprNode parent,
            IDictionary<string, ExprNode> variableDefinitions,
            IDictionary<string, Pair<int, bool>> variableStreams,
            bool[] exprRequiresMultimatchState
        )
        {
            var nodeNumStack = new Stack<int>();

            RowRecogNFAStrand strand = RecursiveBuildStatesInternal(
                parent,
                variableDefinitions,
                variableStreams,
                nodeNumStack,
                exprRequiresMultimatchState);

            // add end state
            var end = new RowRecogNFAStateEndForge();
            end.NodeNumFlat = -1;
            foreach (RowRecogNFAStateForgeBase endStates in strand.EndStates) {
                endStates.AddState(end);
            }

            // assign node num as a counter
            var nodeNumberFlat = 0;
            foreach (RowRecogNFAStateForgeBase theBase in strand.AllStates) {
                theBase.NodeNumFlat = nodeNumberFlat++;
            }

            return new RowRecogNFAStrandResult(new List<RowRecogNFAStateForge>(strand.StartStates), strand.AllStates);
        }
예제 #2
0
        private static RowRecogNFAStrand RecursiveBuildStatesInternal(
            RowRecogExprNode node,
            IDictionary<string, ExprNode> variableDefinitions,
            IDictionary<string, Pair<int, bool>> variableStreams,
            Stack<int> nodeNumStack,
            bool[] exprRequiresMultimatchState
        )
        {
            if (node is RowRecogExprNodeAlteration) {
                var nodeNum = 0;

                IList<RowRecogNFAStateForgeBase> cumulativeStartStates = new List<RowRecogNFAStateForgeBase>();
                IList<RowRecogNFAStateForgeBase> cumulativeStates = new List<RowRecogNFAStateForgeBase>();
                IList<RowRecogNFAStateForgeBase> cumulativeEndStates = new List<RowRecogNFAStateForgeBase>();

                var isPassthrough = false;
                foreach (var child in node.ChildNodes) {
                    nodeNumStack.Push(nodeNum);
                    var strand = RecursiveBuildStatesInternal(
                        child,
                        variableDefinitions,
                        variableStreams,
                        nodeNumStack,
                        exprRequiresMultimatchState);
                    nodeNumStack.Pop();

                    cumulativeStartStates.AddAll(strand.StartStates);
                    cumulativeStates.AddAll(strand.AllStates);
                    cumulativeEndStates.AddAll(strand.EndStates);
                    if (strand.IsPassthrough) {
                        isPassthrough = true;
                    }

                    nodeNum++;
                }

                return new RowRecogNFAStrand(
                    cumulativeStartStates,
                    cumulativeEndStates,
                    cumulativeStates,
                    isPassthrough);
            }

            if (node is RowRecogExprNodeConcatenation) {
                var nodeNum = 0;

                var isPassthrough = true;
                IList<RowRecogNFAStateForgeBase> cumulativeStates = new List<RowRecogNFAStateForgeBase>();
                var strands = new RowRecogNFAStrand[node.ChildNodes.Count];

                foreach (var child in node.ChildNodes) {
                    nodeNumStack.Push(nodeNum);
                    strands[nodeNum] = RecursiveBuildStatesInternal(
                        child,
                        variableDefinitions,
                        variableStreams,
                        nodeNumStack,
                        exprRequiresMultimatchState);
                    nodeNumStack.Pop();

                    cumulativeStates.AddAll(strands[nodeNum].AllStates);
                    if (!strands[nodeNum].IsPassthrough) {
                        isPassthrough = false;
                    }

                    nodeNum++;
                }

                // determine start states: all states until the first non-passthrough start state
                IList<RowRecogNFAStateForgeBase> startStates = new List<RowRecogNFAStateForgeBase>();
                for (var i = 0; i < strands.Length; i++) {
                    startStates.AddAll(strands[i].StartStates);
                    if (!strands[i].IsPassthrough) {
                        break;
                    }
                }

                // determine end states: all states from the back until the last non-passthrough end state
                IList<RowRecogNFAStateForgeBase> endStates = new List<RowRecogNFAStateForgeBase>();
                for (var i = strands.Length - 1; i >= 0; i--) {
                    endStates.AddAll(strands[i].EndStates);
                    if (!strands[i].IsPassthrough) {
                        break;
                    }
                }

                // hook up the end state of each strand with the start states of each next strand
                for (var i = strands.Length - 1; i >= 1; i--) {
                    var current = strands[i];
                    for (var j = i - 1; j >= 0; j--) {
                        var prior = strands[j];

                        foreach (RowRecogNFAStateForgeBase endState in prior.EndStates) {
                            foreach (RowRecogNFAStateForgeBase startState in current.StartStates) {
                                endState.AddState(startState);
                            }
                        }

                        if (!prior.IsPassthrough) {
                            break;
                        }
                    }
                }

                return new RowRecogNFAStrand(startStates, endStates, cumulativeStates, isPassthrough);
            }

            if (node is RowRecogExprNodeNested) {
                var nested = (RowRecogExprNodeNested) node;
                nodeNumStack.Push(0);
                var strand = RecursiveBuildStatesInternal(
                    node.ChildNodes[0],
                    variableDefinitions,
                    variableStreams,
                    nodeNumStack,
                    exprRequiresMultimatchState);
                nodeNumStack.Pop();

                var isPassthrough = strand.IsPassthrough || nested.Type.IsOptional();

                // if this is a repeating node then pipe back each end state to each begin state
                if (nested.Type.IsMultipleMatches()) {
                    foreach (RowRecogNFAStateForgeBase endstate in strand.EndStates) {
                        foreach (RowRecogNFAStateForgeBase startstate in strand.StartStates) {
                            if (!endstate.NextStates.Contains(startstate)) {
                                endstate.NextStates.Add(startstate);
                            }
                        }
                    }
                }

                return new RowRecogNFAStrand(strand.StartStates, strand.EndStates, strand.AllStates, isPassthrough);
            }

            var atom = (RowRecogExprNodeAtom) node;

            // assign stream number for single-variables for most direct expression eval; multiple-variable gets -1
            var streamNum = variableStreams.Get(atom.Tag).First;
            var multiple = variableStreams.Get(atom.Tag).Second;
            var expression = variableDefinitions.Get(atom.Tag);
            var exprRequiresMultimatch = exprRequiresMultimatchState[streamNum];

            RowRecogNFAStateForgeBase nextState;
            if (atom.Type == RowRecogNFATypeEnum.ZERO_TO_MANY ||
                atom.Type == RowRecogNFATypeEnum.ZERO_TO_MANY_RELUCTANT) {
                nextState = new RowRecogNFAStateZeroToManyForge(
                    ToString(nodeNumStack),
                    atom.Tag,
                    streamNum,
                    multiple,
                    atom.Type.IsGreedy(),
                    exprRequiresMultimatch,
                    expression);
            }
            else if (atom.Type == RowRecogNFATypeEnum.ONE_TO_MANY ||
                     atom.Type == RowRecogNFATypeEnum.ONE_TO_MANY_RELUCTANT) {
                nextState = new RowRecogNFAStateOneToManyForge(
                    ToString(nodeNumStack),
                    atom.Tag,
                    streamNum,
                    multiple,
                    atom.Type.IsGreedy(),
                    exprRequiresMultimatch,
                    expression);
            }
            else if (atom.Type == RowRecogNFATypeEnum.ONE_OPTIONAL ||
                     atom.Type == RowRecogNFATypeEnum.ONE_OPTIONAL_RELUCTANT) {
                nextState = new RowRecogNFAStateOneOptionalForge(
                    ToString(nodeNumStack),
                    atom.Tag,
                    streamNum,
                    multiple,
                    atom.Type.IsGreedy(),
                    exprRequiresMultimatch,
                    expression);
            }
            else if (expression == null) {
                nextState = new RowRecogNFAStateAnyOneForge(ToString(nodeNumStack), atom.Tag, streamNum, multiple);
            }
            else {
                nextState = new RowRecogNFAStateFilterForge(
                    ToString(nodeNumStack),
                    atom.Tag,
                    streamNum,
                    multiple,
                    exprRequiresMultimatch,
                    expression);
            }

            return new RowRecogNFAStrand(
                Collections.SingletonList(nextState),
                Collections.SingletonList(nextState),
                Collections.SingletonList(nextState),
                atom.Type.IsOptional());
        }