Esempio n. 1
0
        // -----------------------------------------------------------------------------
        //
        // bofFixup. Fixup for state tables that include {bof} beginning of input
        // testing.
        // Do an swizzle similar to chaining, modifying the followPos set of
        // the bofNode to include the followPos nodes from other {bot} nodes
        // scattered through the tree.
        //
        // This function has much in common with calcChainedFollowPos().
        //
        // -----------------------------------------------------------------------------
        internal void BofFixup()
        {
            //
            // The parse tree looks like this ...
            // fTree root --. <cat>
            // / \
            // <cat> <#end node>
            // / \
            // <bofNode> rest
            // of tree
            //
            // We will be adding things to the followPos set of the <bofNode>
            //
            RBBINode bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild;

            IBM.ICU.Impl.Assert.Assrt(bofNode.fType == IBM.ICU.Text.RBBINode.leafChar);
            IBM.ICU.Impl.Assert.Assrt(bofNode.fVal == 2);

            // Get all nodes that can be the start a match of the user-written rules
            // (excluding the fake bofNode)
            // We want the nodes that can start a match in the
            // part labeled "rest of tree"
            //
            ILOG.J2CsMapping.Collections.ISet matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet;
            IIterator startNodeIt = new ILOG.J2CsMapping.Collections.IteratorAdapter(matchStartNodes.GetEnumerator());

            while (startNodeIt.HasNext())
            {
                RBBINode startNode = (RBBINode)startNodeIt.Next();
                if (startNode.fType != IBM.ICU.Text.RBBINode.leafChar)
                {
                    continue;
                }

                if (startNode.fVal == bofNode.fVal)
                {
                    // We found a leaf node corresponding to a {bof} that was
                    // explicitly written into a rule.
                    // Add everything from the followPos set of this node to the
                    // followPos set of the fake bofNode at the start of the tree.
                    //
                    ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(startNode.fFollowPos, bofNode.fFollowPos);
                }
            }
        }
Esempio n. 2
0
        // -----------------------------------------------------------------------------
        //
        // calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9
        //
        // -----------------------------------------------------------------------------
        internal void CalcFollowPos(RBBINode n)
        {
            if (n == null || n.fType == IBM.ICU.Text.RBBINode.leafChar ||
                n.fType == IBM.ICU.Text.RBBINode.endMark)
            {
                return;
            }

            CalcFollowPos(n.fLeftChild);
            CalcFollowPos(n.fRightChild);

            // Aho rule #1
            if (n.fType == IBM.ICU.Text.RBBINode.opCat)
            {
                RBBINode i;     // is 'i' in Aho's description

                ILOG.J2CsMapping.Collections.ISet LastPosOfLeftChild = n.fLeftChild.fLastPosSet;

                IIterator ix = new ILOG.J2CsMapping.Collections.IteratorAdapter(LastPosOfLeftChild.GetEnumerator());
                while (ix.HasNext())
                {
                    i = (RBBINode)ix.Next();
                    ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(n.fRightChild.fFirstPosSet, i.fFollowPos);
                }
            }

            // Aho rule #2
            if (n.fType == IBM.ICU.Text.RBBINode.opStar || n.fType == IBM.ICU.Text.RBBINode.opPlus)
            {
                RBBINode  i_0;    // again, n and i are the names from Aho's description.
                IIterator ix_1 = new ILOG.J2CsMapping.Collections.IteratorAdapter(n.fLastPosSet.GetEnumerator());
                while (ix_1.HasNext())
                {
                    i_0 = (RBBINode)ix_1.Next();
                    ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(n.fFirstPosSet, i_0.fFollowPos);
                }
            }
        }
Esempio n. 3
0
        // -----------------------------------------------------------------------------
        //
        // calcChainedFollowPos. Modify the previously calculated followPos sets
        // to implement rule chaining. NOT described by Aho
        //
        // -----------------------------------------------------------------------------
        internal void CalcChainedFollowPos(RBBINode tree)
        {
            IList endMarkerNodes = new ArrayList();
            IList leafNodes      = new ArrayList();

            // get a list of all endmarker nodes.
            tree.FindNodes(endMarkerNodes, IBM.ICU.Text.RBBINode.endMark);

            // get a list all leaf nodes
            tree.FindNodes(leafNodes, IBM.ICU.Text.RBBINode.leafChar);

            // Get all nodes that can be the start a match, which is FirstPosition()
            // of the portion of the tree corresponding to user-written rules.
            // See the tree description in bofFixup().
            RBBINode userRuleRoot = tree;

            if (fRB.fSetBuilder.SawBOF())
            {
                userRuleRoot = tree.fLeftChild.fRightChild;
            }
            IBM.ICU.Impl.Assert.Assrt(userRuleRoot != null);
            ILOG.J2CsMapping.Collections.ISet matchStartNodes = userRuleRoot.fFirstPosSet;

            // Iteratate over all leaf nodes,
            //
            IIterator endNodeIx = new ILOG.J2CsMapping.Collections.IteratorAdapter(leafNodes.GetEnumerator());

            while (endNodeIx.HasNext())
            {
                RBBINode tNode   = (RBBINode)endNodeIx.Next();
                RBBINode endNode = null;

                // Identify leaf nodes that correspond to overall rule match
                // positions.
                // These include an endMarkerNode in their followPos sets.
                IIterator i = new ILOG.J2CsMapping.Collections.IteratorAdapter(endMarkerNodes.GetEnumerator());
                while (i.HasNext())
                {
                    RBBINode endMarkerNode = (RBBINode)i.Next();
                    if (ILOG.J2CsMapping.Collections.Collections.Contains(endMarkerNode, tNode.fFollowPos))
                    {
                        endNode = tNode;
                        break;
                    }
                }
                if (endNode == null)
                {
                    // node wasn't an end node. Try again with the next.
                    continue;
                }

                // We've got a node that can end a match.

                // Line Break Specific hack: If this node's val correspond to the
                // $CM char class,
                // don't chain from it.
                // TODO: Add rule syntax for this behavior, get specifics out of
                // here and
                // into the rule file.
                if (fRB.fLBCMNoChain)
                {
                    int c = this.fRB.fSetBuilder.GetFirstChar(endNode.fVal);
                    if (c != -1)
                    {
                        // c == -1 occurs with sets containing only the {eof} marker
                        // string.
                        int cLBProp = IBM.ICU.Lang.UCharacter.GetIntPropertyValue(c,
                                                                                  IBM.ICU.Lang.UProperty_Constants.LINE_BREAK);
                        if (cLBProp == IBM.ICU.Lang.UCharacter.LineBreak.COMBINING_MARK)
                        {
                            continue;
                        }
                    }
                }

                // Now iterate over the nodes that can start a match, looking for
                // ones
                // with the same char class as our ending node.
                RBBINode  startNode;
                IIterator startNodeIx = new ILOG.J2CsMapping.Collections.IteratorAdapter(matchStartNodes.GetEnumerator());
                while (startNodeIx.HasNext())
                {
                    startNode = (RBBINode)startNodeIx.Next();
                    if (startNode.fType != IBM.ICU.Text.RBBINode.leafChar)
                    {
                        continue;
                    }

                    if (endNode.fVal == startNode.fVal)
                    {
                        // The end val (character class) of one possible match is
                        // the
                        // same as the start of another.

                        // Add all nodes from the followPos of the start node to the
                        // followPos set of the end node, which will have the effect
                        // of
                        // letting matches transition from a match state at endNode
                        // to the second char of a match starting with startNode.
                        ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(startNode.fFollowPos, endNode.fFollowPos);
                    }
                }
            }
        }