예제 #1
0
 //-------------------------------------------------------------------------
 //
 //       findNodes() Locate all the nodes of the specified type, starting
 //                       at the specified root.
 //
 //-------------------------------------------------------------------------
 internal virtual void FindNodes(IList <RBBINode> dest, int kind)
 {
     if (fType == kind)
     {
         dest.Add(this);
     }
     if (fLeftChild != null)
     {
         fLeftChild.FindNodes(dest, kind);
     }
     if (fRightChild != null)
     {
         fRightChild.FindNodes(dest, kind);
     }
 }
예제 #2
0
        //-----------------------------------------------------------------------------
        //
        //   calcChainedFollowPos.    Modify the previously calculated followPos sets
        //                            to implement rule chaining.  NOT described by Aho
        //
        //-----------------------------------------------------------------------------
        internal virtual void CalcChainedFollowPos(RBBINode tree)
        {
            IList <RBBINode> endMarkerNodes = new JCG.List <RBBINode>();
            IList <RBBINode> leafNodes      = new JCG.List <RBBINode>();

            // get a list of all endmarker nodes.
            tree.FindNodes(endMarkerNodes, RBBINode.endMark);

            // get a list all leaf nodes
            tree.FindNodes(leafNodes, RBBINode.leafChar);

            // Collect all leaf nodes that can start matches for rules
            // with inbound chaining enabled, which is the union of the
            // firstPosition sets from each of the rule root nodes.

            IList <RBBINode> ruleRootNodes = new JCG.List <RBBINode>();

            AddRuleRootNodes(ruleRootNodes, tree);

            ISet <RBBINode> matchStartNodes = new JCG.HashSet <RBBINode>();

            foreach (RBBINode node in ruleRootNodes)
            {
                if (node.fChainIn)
                {
                    matchStartNodes.UnionWith(node.fFirstPosSet);
                }
            }

            // Iterate over all leaf nodes,
            //
            foreach (RBBINode tNode in leafNodes)
            {
                RBBINode endNode = null;

                // Identify leaf nodes that correspond to overall rule match positions.
                //   These include an endMarkerNode in their followPos sets.
                foreach (RBBINode endMarkerNode in endMarkerNodes)
                {
                    if (tNode.fFollowPos.Contains(endMarkerNode))
                    {
                        endNode = tNode;
                        break;
                    }
                }
                if (endNode == null)
                {
                    // node wasn't an end node.  Try again with the next.
                    continue;
                }

                // We've got a node that can end a match.

                // Line Break Specific hack:  If this node's val correspond to the $CM char class,
                //                            don't chain from it.
                // TODO:  Add rule syntax for this behavior, get specifics out of here and
                //        into the rule file.
                if (fRB.fLBCMNoChain)
                {
                    int c = this.fRB.fSetBuilder.GetFirstChar(endNode.fVal);
                    if (c != -1)
                    {
                        // c == -1 occurs with sets containing only the {eof} marker string.
                        int cLBProp = UChar.GetIntPropertyValue(c, UProperty.Line_Break);
                        if (cLBProp == LineBreak.CombiningMark)
                        {
                            continue;
                        }
                    }
                }


                // Now iterate over the nodes that can start a match, looking for ones
                //   with the same char class as our ending node.
                foreach (RBBINode startNode in matchStartNodes)
                {
                    if (startNode.fType != RBBINode.leafChar)
                    {
                        continue;
                    }

                    if (endNode.fVal == startNode.fVal)
                    {
                        // The end val (character class) of one possible match is the
                        //   same as the start of another.

                        // Add all nodes from the followPos of the start node to the
                        //  followPos set of the end node, which will have the effect of
                        //  letting matches transition from a match state at endNode
                        //  to the second char of a match starting with startNode.
                        endNode.fFollowPos.UnionWith(startNode.fFollowPos);
                    }
                }
            }
        }