// ----------------------------------------------------------------------------- // // bofFixup. Fixup for state tables that include {bof} beginning of input // testing. // Do an swizzle similar to chaining, modifying the followPos set of // the bofNode to include the followPos nodes from other {bot} nodes // scattered through the tree. // // This function has much in common with calcChainedFollowPos(). // // ----------------------------------------------------------------------------- internal void BofFixup() { // // The parse tree looks like this ... // fTree root --. <cat> // / \ // <cat> <#end node> // / \ // <bofNode> rest // of tree // // We will be adding things to the followPos set of the <bofNode> // RBBINode bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild; IBM.ICU.Impl.Assert.Assrt(bofNode.fType == IBM.ICU.Text.RBBINode.leafChar); IBM.ICU.Impl.Assert.Assrt(bofNode.fVal == 2); // Get all nodes that can be the start a match of the user-written rules // (excluding the fake bofNode) // We want the nodes that can start a match in the // part labeled "rest of tree" // ILOG.J2CsMapping.Collections.ISet matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet; IIterator startNodeIt = new ILOG.J2CsMapping.Collections.IteratorAdapter(matchStartNodes.GetEnumerator()); while (startNodeIt.HasNext()) { RBBINode startNode = (RBBINode)startNodeIt.Next(); if (startNode.fType != IBM.ICU.Text.RBBINode.leafChar) { continue; } if (startNode.fVal == bofNode.fVal) { // We found a leaf node corresponding to a {bof} that was // explicitly written into a rule. // Add everything from the followPos set of this node to the // followPos set of the fake bofNode at the start of the tree. // ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(startNode.fFollowPos, bofNode.fFollowPos); } } }
// ----------------------------------------------------------------------------- // // calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9 // // ----------------------------------------------------------------------------- internal void CalcFollowPos(RBBINode n) { if (n == null || n.fType == IBM.ICU.Text.RBBINode.leafChar || n.fType == IBM.ICU.Text.RBBINode.endMark) { return; } CalcFollowPos(n.fLeftChild); CalcFollowPos(n.fRightChild); // Aho rule #1 if (n.fType == IBM.ICU.Text.RBBINode.opCat) { RBBINode i; // is 'i' in Aho's description ILOG.J2CsMapping.Collections.ISet LastPosOfLeftChild = n.fLeftChild.fLastPosSet; IIterator ix = new ILOG.J2CsMapping.Collections.IteratorAdapter(LastPosOfLeftChild.GetEnumerator()); while (ix.HasNext()) { i = (RBBINode)ix.Next(); ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(n.fRightChild.fFirstPosSet, i.fFollowPos); } } // Aho rule #2 if (n.fType == IBM.ICU.Text.RBBINode.opStar || n.fType == IBM.ICU.Text.RBBINode.opPlus) { RBBINode i_0; // again, n and i are the names from Aho's description. IIterator ix_1 = new ILOG.J2CsMapping.Collections.IteratorAdapter(n.fLastPosSet.GetEnumerator()); while (ix_1.HasNext()) { i_0 = (RBBINode)ix_1.Next(); ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(n.fFirstPosSet, i_0.fFollowPos); } } }
// ----------------------------------------------------------------------------- // // calcChainedFollowPos. Modify the previously calculated followPos sets // to implement rule chaining. NOT described by Aho // // ----------------------------------------------------------------------------- internal void CalcChainedFollowPos(RBBINode tree) { IList endMarkerNodes = new ArrayList(); IList leafNodes = new ArrayList(); // get a list of all endmarker nodes. tree.FindNodes(endMarkerNodes, IBM.ICU.Text.RBBINode.endMark); // get a list all leaf nodes tree.FindNodes(leafNodes, IBM.ICU.Text.RBBINode.leafChar); // Get all nodes that can be the start a match, which is FirstPosition() // of the portion of the tree corresponding to user-written rules. // See the tree description in bofFixup(). RBBINode userRuleRoot = tree; if (fRB.fSetBuilder.SawBOF()) { userRuleRoot = tree.fLeftChild.fRightChild; } IBM.ICU.Impl.Assert.Assrt(userRuleRoot != null); ILOG.J2CsMapping.Collections.ISet matchStartNodes = userRuleRoot.fFirstPosSet; // Iteratate over all leaf nodes, // IIterator endNodeIx = new ILOG.J2CsMapping.Collections.IteratorAdapter(leafNodes.GetEnumerator()); while (endNodeIx.HasNext()) { RBBINode tNode = (RBBINode)endNodeIx.Next(); RBBINode endNode = null; // Identify leaf nodes that correspond to overall rule match // positions. // These include an endMarkerNode in their followPos sets. IIterator i = new ILOG.J2CsMapping.Collections.IteratorAdapter(endMarkerNodes.GetEnumerator()); while (i.HasNext()) { RBBINode endMarkerNode = (RBBINode)i.Next(); if (ILOG.J2CsMapping.Collections.Collections.Contains(endMarkerNode, tNode.fFollowPos)) { endNode = tNode; break; } } if (endNode == null) { // node wasn't an end node. Try again with the next. continue; } // We've got a node that can end a match. // Line Break Specific hack: If this node's val correspond to the // $CM char class, // don't chain from it. // TODO: Add rule syntax for this behavior, get specifics out of // here and // into the rule file. if (fRB.fLBCMNoChain) { int c = this.fRB.fSetBuilder.GetFirstChar(endNode.fVal); if (c != -1) { // c == -1 occurs with sets containing only the {eof} marker // string. int cLBProp = IBM.ICU.Lang.UCharacter.GetIntPropertyValue(c, IBM.ICU.Lang.UProperty_Constants.LINE_BREAK); if (cLBProp == IBM.ICU.Lang.UCharacter.LineBreak.COMBINING_MARK) { continue; } } } // Now iterate over the nodes that can start a match, looking for // ones // with the same char class as our ending node. RBBINode startNode; IIterator startNodeIx = new ILOG.J2CsMapping.Collections.IteratorAdapter(matchStartNodes.GetEnumerator()); while (startNodeIx.HasNext()) { startNode = (RBBINode)startNodeIx.Next(); if (startNode.fType != IBM.ICU.Text.RBBINode.leafChar) { continue; } if (endNode.fVal == startNode.fVal) { // The end val (character class) of one possible match is // the // same as the start of another. // Add all nodes from the followPos of the start node to the // followPos set of the end node, which will have the effect // of // letting matches transition from a match state at endNode // to the second char of a match starting with startNode. ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(startNode.fFollowPos, endNode.fFollowPos); } } } }