示例#1
0
        // -------------------------------------------------------------------------
        //
        // print. Print out a single node, for debugging.
        //
        // -------------------------------------------------------------------------
        // /CLOVER:OFF
        static internal void PrintNode(RBBINode n)
        {
            if (n == null)
            {
                System.Console.Out.Write(" -- null --\n");
            }
            else
            {
                RBBINode.PrintInt(n.fSerialNum, 10);
                RBBINode.PrintString(nodeTypeNames[n.fType], 11);
                RBBINode.PrintInt((n.fParent == null) ? 0 : n.fParent.fSerialNum, 11);
                RBBINode.PrintInt((n.fLeftChild == null) ? 0
                            : n.fLeftChild.fSerialNum, 11);
                RBBINode.PrintInt((n.fRightChild == null) ? 0
                            : n.fRightChild.fSerialNum, 12);
                RBBINode.PrintInt(n.fFirstPos, 12);
                RBBINode.PrintInt(n.fVal, 7);

                if (n.fType == varRef)
                {
                    System.Console.Out.Write(" " + n.fText);
                }
            }
            System.Console.Out.WriteLine("");
        }
示例#2
0
        // ------------------------------------------------------------------------
        //
        // printRanges A debugging function.
        // dump out all of the range definitions.
        //
        // ------------------------------------------------------------------------
        // /CLOVER:OFF
        internal void PrintRanges()
        {
            RBBISetBuilder.RangeDescriptor rlRange;
            int i;

            System.Console.Out.Write("\n\n Nonoverlapping Ranges ...\n");
            for (rlRange = fRangeList; rlRange != null; rlRange = rlRange.fNext)
            {
                System.Console.Out.Write(" " + rlRange.fNum + "   "
                                         + (int)rlRange.fStartChar + "-" + (int)rlRange.fEndChar);

                for (i = 0; i < rlRange.fIncludesSets.Count; i++)
                {
                    RBBINode usetNode = (RBBINode)rlRange.fIncludesSets[i];
                    String   setName  = "anon";
                    RBBINode setRef   = usetNode.fParent;
                    if (setRef != null)
                    {
                        RBBINode varRef = setRef.fParent;
                        if (varRef != null && varRef.fType == IBM.ICU.Text.RBBINode.varRef)
                        {
                            setName = varRef.fText;
                        }
                    }
                    System.Console.Out.Write(setName);
                    System.Console.Out.Write("  ");
                }
                System.Console.Out.WriteLine("");
            }
        }
示例#3
0
        // -------------------------------------------------------------------------
        //
        // cloneTree Make a copy of the subtree rooted at this node.
        // Discard any variable references encountered along the way,
        // and replace with copies of the variable's definitions.
        // Used to replicate the expression underneath variable
        // references in preparation for generating the DFA tables.
        //
        // -------------------------------------------------------------------------
        internal RBBINode CloneTree()
        {
            RBBINode n;

            if (fType == RBBINode.varRef)
            {
                // If the current node is a variable reference, skip over it
                // and clone the definition of the variable instead.
                n = fLeftChild.CloneTree();
            }
            else if (fType == RBBINode.uset)
            {
                n = this;
            }
            else
            {
                n = new RBBINode(this);
                if (fLeftChild != null)
                {
                    n.fLeftChild         = fLeftChild.CloneTree();
                    n.fLeftChild.fParent = n;
                }
                if (fRightChild != null)
                {
                    n.fRightChild         = fRightChild.CloneTree();
                    n.fRightChild.fParent = n;
                }
            }
            return(n);
        }
示例#4
0
	    // /CLOVER:ON
	
	    // ---------------------------------------------------------------------------------
	    //
	    // pushNewNode create a new RBBINode of the specified type and push it
	    // onto the stack of nodes.
	    //
	    // ---------------------------------------------------------------------------------
	    internal RBBINode PushNewNode(int nodeType) {
	        fNodeStackPtr++;
	        if (fNodeStackPtr >= kStackSize) {
	            System.Console.Out.WriteLine("RBBIRuleScanner.pushNewNode - stack overflow.");
	            Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
	        }
	        fNodeStack[fNodeStackPtr] = new RBBINode(nodeType);
	        return fNodeStack[fNodeStackPtr];
	    }
示例#5
0
        // ------------------------------------------------------------------------
        //
        // addValToSets Add a runtime-mapped input value to each uset from a
        // list of uset nodes. (val corresponds to a state table column.)
        // For each of the original Unicode sets - which correspond
        // directly to uset nodes - a logically equivalent expression
        // is constructed in terms of the remapped runtime input
        // symbol set. This function adds one runtime input symbol to
        // a list of sets.
        //
        // The "logically equivalent expression" is the tree for an
        // or-ing together of all of the symbols that go into the set.
        //
        // ------------------------------------------------------------------------
        internal void AddValToSets(IList sets, int val)
        {
            int ix;

            for (ix = 0; ix < sets.Count; ix++)
            {
                RBBINode usetNode = (RBBINode)sets[ix];
                AddValToSet(usetNode, val);
            }
        }
        // -----------------------------------------------------------------------------
        //
        // printSet Debug function. Print the contents of a set of Nodes
        //
        // -----------------------------------------------------------------------------

        internal void PrintSet(ICollection s)
        {
            IIterator it = new ILOG.J2CsMapping.Collections.IteratorAdapter(s.GetEnumerator());

            while (it.HasNext())
            {
                RBBINode n = (RBBINode)it.Next();
                IBM.ICU.Text.RBBINode.PrintInt(n.fSerialNum, 8);
            }
            System.Console.Out.WriteLine();
        }
示例#7
0
	    // ----------------------------------------------------------------------------------------
	    //
	    // findSetFor given a String,
	    // - find the corresponding Unicode Set (uset node)
	    // (create one if necessary)
	    // - Set fLeftChild of the caller's node (should be a setRef node)
	    // to the uset node
	    // Maintain a hash table of uset nodes, so the same one is always used
	    // for the same string.
	    // If a "to adopt" set is provided and we haven't seen this key before,
	    // add the provided set to the hash table.
	    // If the string is one (32 bit) char in length, the set contains
	    // just one element which is the char in question.
	    // If the string is "any", return a set containing all chars.
	    //
	    // ----------------------------------------------------------------------------------------
	    internal void FindSetFor(String s, RBBINode node, UnicodeSet setToAdopt) {
	
	        RBBIRuleScanner.RBBISetTableEl  el;
	
	        // First check whether we've already cached a set for this string.
	        // If so, just use the cached set in the new node.
	        // delete any set provided by the caller, since we own it.
	        el = (RBBIRuleScanner.RBBISetTableEl ) ILOG.J2CsMapping.Collections.Collections.Get(fSetTable,s);
	        if (el != null) {
	            node.fLeftChild = el.val;
	            IBM.ICU.Impl.Assert.Assrt(node.fLeftChild.fType == IBM.ICU.Text.RBBINode.uset);
	            return;
	        }
	
	        // Haven't seen this set before.
	        // If the caller didn't provide us with a prebuilt set,
	        // create a new UnicodeSet now.
	        if (setToAdopt == null) {
	            if (s.Equals(kAny)) {
	                setToAdopt = new UnicodeSet(0x000000, 0x10ffff);
	            } else {
	                int c;
	                c = IBM.ICU.Text.UTF16.CharAt(s, 0);
	                setToAdopt = new UnicodeSet(c, c);
	            }
	        }
	
	        //
	        // Make a new uset node to refer to this UnicodeSet
	        // This new uset node becomes the child of the caller's setReference
	        // node.
	        //
	        RBBINode usetNode = new RBBINode(IBM.ICU.Text.RBBINode.uset);
	        usetNode.fInputSet = setToAdopt;
	        usetNode.fParent = node;
	        node.fLeftChild = usetNode;
	        usetNode.fText = s;
	
	        //
	        // Add the new uset node to the list of all uset nodes.
	        //
	        ILOG.J2CsMapping.Collections.Generics.Collections.Add(fRB.fUSetNodes,usetNode);
	
	        //
	        // Add the new set to the set hash table.
	        //
	        el = new RBBIRuleScanner.RBBISetTableEl ();
	        el.key = s;
	        el.val = usetNode;
	        ILOG.J2CsMapping.Collections.Collections.Put(fSetTable,el.key,el);
	
	        return;
	    }
示例#8
0
        //
        // RBBISymbolTable::lookupNode Given a key (a variable name), return the
        // corresponding RBBI Node. If there is no entry
        // in the table for this name, return NULL.
        //
        internal RBBINode LookupNode(String key_0)
        {
            RBBINode retNode = null;

            RBBISymbolTable.RBBISymbolTableEntry el;

            el = (RBBISymbolTable.RBBISymbolTableEntry)ILOG.J2CsMapping.Collections.Collections.Get(fHashTable, key_0);
            if (el != null)
            {
                retNode = el.val;
            }
            return(retNode);
        }
示例#9
0
        //
        // RBBISymbolTable::addEntry Add a new entry to the symbol table.
        // Indicate an error if the name already exists -
        // this will only occur in the case of duplicate
        // variable assignments.
        //
        internal void AddEntry(String key_0, RBBINode val_1)
        {
            RBBISymbolTable.RBBISymbolTableEntry e;
            e = (RBBISymbolTable.RBBISymbolTableEntry)ILOG.J2CsMapping.Collections.Collections.Get(fHashTable, key_0);
            if (e != null)
            {
                fRuleScanner.Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_VARIABLE_REDFINITION);
                return;
            }

            e     = new RBBISymbolTable.RBBISymbolTableEntry();
            e.key = key_0;
            e.val = val_1;
            ILOG.J2CsMapping.Collections.Collections.Put(fHashTable, e.key, e);
        }
示例#10
0
 internal RBBINode(RBBINode other)
 {
     this.fPrecedence = precZero;
     fSerialNum       = ++gLastSerial;
     fType            = other.fType;
     fInputSet        = other.fInputSet;
     fPrecedence      = other.fPrecedence;
     fText            = other.fText;
     fFirstPos        = other.fFirstPos;
     fLastPos         = other.fLastPos;
     fNullable        = other.fNullable;
     fVal             = other.fVal;
     fFirstPosSet     = new HashedSet(other.fFirstPosSet);
     fLastPosSet      = new HashedSet(other.fLastPosSet);
     fFollowPos       = new HashedSet(other.fFollowPos);
 }
示例#11
0
        // -----------------------------------------------------------------------------
        //
        // bofFixup. Fixup for state tables that include {bof} beginning of input
        // testing.
        // Do an swizzle similar to chaining, modifying the followPos set of
        // the bofNode to include the followPos nodes from other {bot} nodes
        // scattered through the tree.
        //
        // This function has much in common with calcChainedFollowPos().
        //
        // -----------------------------------------------------------------------------
        internal void BofFixup()
        {
            //
            // The parse tree looks like this ...
            // fTree root --. <cat>
            // / \
            // <cat> <#end node>
            // / \
            // <bofNode> rest
            // of tree
            //
            // We will be adding things to the followPos set of the <bofNode>
            //
            RBBINode bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild;

            IBM.ICU.Impl.Assert.Assrt(bofNode.fType == IBM.ICU.Text.RBBINode.leafChar);
            IBM.ICU.Impl.Assert.Assrt(bofNode.fVal == 2);

            // Get all nodes that can be the start a match of the user-written rules
            // (excluding the fake bofNode)
            // We want the nodes that can start a match in the
            // part labeled "rest of tree"
            //
            ILOG.J2CsMapping.Collections.ISet matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet;
            IIterator startNodeIt = new ILOG.J2CsMapping.Collections.IteratorAdapter(matchStartNodes.GetEnumerator());

            while (startNodeIt.HasNext())
            {
                RBBINode startNode = (RBBINode)startNodeIt.Next();
                if (startNode.fType != IBM.ICU.Text.RBBINode.leafChar)
                {
                    continue;
                }

                if (startNode.fVal == bofNode.fVal)
                {
                    // We found a leaf node corresponding to a {bof} that was
                    // explicitly written into a rule.
                    // Add everything from the followPos set of this node to the
                    // followPos set of the fake bofNode at the start of the tree.
                    //
                    ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(startNode.fFollowPos, bofNode.fFollowPos);
                }
            }
        }
示例#12
0
        // -----------------------------------------------------------------------------
        //
        // calcNullable. Impossible to explain succinctly. See Aho, section 3.9
        //
        // -----------------------------------------------------------------------------
        internal void CalcNullable(RBBINode n)
        {
            if (n == null)
            {
                return;
            }
            if (n.fType == IBM.ICU.Text.RBBINode.setRef || n.fType == IBM.ICU.Text.RBBINode.endMark)
            {
                // These are non-empty leaf node types.
                n.fNullable = false;
                return;
            }

            if (n.fType == IBM.ICU.Text.RBBINode.lookAhead || n.fType == IBM.ICU.Text.RBBINode.tag)
            {
                // Lookahead marker node. It's a leaf, so no recursion on children.
                // It's nullable because it does not match any literal text from the
                // input stream.
                n.fNullable = true;
                return;
            }

            // The node is not a leaf.
            // Calculate nullable on its children.
            CalcNullable(n.fLeftChild);
            CalcNullable(n.fRightChild);

            // Apply functions from table 3.40 in Aho
            if (n.fType == IBM.ICU.Text.RBBINode.opOr)
            {
                n.fNullable = n.fLeftChild.fNullable || n.fRightChild.fNullable;
            }
            else if (n.fType == IBM.ICU.Text.RBBINode.opCat)
            {
                n.fNullable = n.fLeftChild.fNullable && n.fRightChild.fNullable;
            }
            else if (n.fType == IBM.ICU.Text.RBBINode.opStar || n.fType == IBM.ICU.Text.RBBINode.opQuestion)
            {
                n.fNullable = true;
            }
            else
            {
                n.fNullable = false;
            }
        }
示例#13
0
        // -------------------------------------------------------------------------
        //
        // flattenVariables Walk a parse tree, replacing any variable
        // references with a copy of the variable's definition.
        // Aside from variables, the tree is not changed.
        //
        // Return the root of the tree. If the root was not a variable
        // reference, it remains unchanged - the root we started with
        // is the root we return. If, however, the root was a variable
        // reference, the root of the newly cloned replacement tree will
        // be returned, and the original tree deleted.
        //
        // This function works by recursively walking the tree
        // without doing anything until a variable reference is
        // found, then calling cloneTree() at that point. Any
        // nested references are handled by cloneTree(), not here.
        //
        // -------------------------------------------------------------------------
        internal RBBINode FlattenVariables()
        {
            if (fType == varRef)
            {
                RBBINode retNode = fLeftChild.CloneTree();
                // delete this;
                return(retNode);
            }

            if (fLeftChild != null)
            {
                fLeftChild         = fLeftChild.FlattenVariables();
                fLeftChild.fParent = this;
            }
            if (fRightChild != null)
            {
                fRightChild         = fRightChild.FlattenVariables();
                fRightChild.fParent = this;
            }
            return(this);
        }
示例#14
0
        // -----------------------------------------------------------------------------
        //
        // printPosSets Debug function. Dump Nullable, firstpos, lastpos and
        // followpos
        // for each node in the tree.
        //
        // -----------------------------------------------------------------------------

        internal void PrintPosSets(RBBINode n)
        {
            if (n == null)
            {
                return;
            }
            IBM.ICU.Text.RBBINode.PrintNode(n);
            System.Console.Out.Write("         Nullable:  " + n.fNullable);

            System.Console.Out.Write("         firstpos:  ");
            PrintSet(n.fFirstPosSet);

            System.Console.Out.Write("         lastpos:   ");
            PrintSet(n.fLastPosSet);

            System.Console.Out.Write("         followpos: ");
            PrintSet(n.fFollowPos);

            PrintPosSets(n.fLeftChild);
            PrintPosSets(n.fRightChild);
        }
示例#15
0
        // -----------------------------------------------------------------------------
        //
        // calcLastPos. Impossible to explain succinctly. See Aho, section 3.9
        //
        // -----------------------------------------------------------------------------
        internal void CalcLastPos(RBBINode n)
        {
            if (n == null)
            {
                return;
            }
            if (n.fType == IBM.ICU.Text.RBBINode.leafChar || n.fType == IBM.ICU.Text.RBBINode.endMark ||
                n.fType == IBM.ICU.Text.RBBINode.lookAhead || n.fType == IBM.ICU.Text.RBBINode.tag)
            {
                // These are non-empty leaf node types.
                ILOG.J2CsMapping.Collections.Generics.Collections.Add(n.fLastPosSet, n);
                return;
            }

            // The node is not a leaf.
            // Calculate lastPos on its children.
            CalcLastPos(n.fLeftChild);
            CalcLastPos(n.fRightChild);

            // Apply functions from table 3.40 in Aho
            if (n.fType == IBM.ICU.Text.RBBINode.opOr)
            {
                ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(n.fLeftChild.fLastPosSet, n.fLastPosSet);
                ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(n.fRightChild.fLastPosSet, n.fLastPosSet);
            }
            else if (n.fType == IBM.ICU.Text.RBBINode.opCat)
            {
                ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(n.fRightChild.fLastPosSet, n.fLastPosSet);
                if (n.fRightChild.fNullable)
                {
                    ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(n.fLeftChild.fLastPosSet, n.fLastPosSet);
                }
            }
            else if (n.fType == IBM.ICU.Text.RBBINode.opStar || n.fType == IBM.ICU.Text.RBBINode.opQuestion ||
                     n.fType == IBM.ICU.Text.RBBINode.opPlus)
            {
                ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(n.fLeftChild.fLastPosSet, n.fLastPosSet);
            }
        }
示例#16
0
        // -----------------------------------------------------------------------------
        //
        // calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9
        //
        // -----------------------------------------------------------------------------
        internal void CalcFollowPos(RBBINode n)
        {
            if (n == null || n.fType == IBM.ICU.Text.RBBINode.leafChar ||
                n.fType == IBM.ICU.Text.RBBINode.endMark)
            {
                return;
            }

            CalcFollowPos(n.fLeftChild);
            CalcFollowPos(n.fRightChild);

            // Aho rule #1
            if (n.fType == IBM.ICU.Text.RBBINode.opCat)
            {
                RBBINode i;     // is 'i' in Aho's description

                ILOG.J2CsMapping.Collections.ISet LastPosOfLeftChild = n.fLeftChild.fLastPosSet;

                IIterator ix = new ILOG.J2CsMapping.Collections.IteratorAdapter(LastPosOfLeftChild.GetEnumerator());
                while (ix.HasNext())
                {
                    i = (RBBINode)ix.Next();
                    ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(n.fRightChild.fFirstPosSet, i.fFollowPos);
                }
            }

            // Aho rule #2
            if (n.fType == IBM.ICU.Text.RBBINode.opStar || n.fType == IBM.ICU.Text.RBBINode.opPlus)
            {
                RBBINode  i_0;    // again, n and i are the names from Aho's description.
                IIterator ix_1 = new ILOG.J2CsMapping.Collections.IteratorAdapter(n.fLastPosSet.GetEnumerator());
                while (ix_1.HasNext())
                {
                    i_0 = (RBBINode)ix_1.Next();
                    ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(n.fFirstPosSet, i_0.fFollowPos);
                }
            }
        }
示例#17
0
        // -------------------------------------------------------------------------
        //
        // flattenSets Walk the parse tree, replacing any nodes of type setRef
        // with a copy of the expression tree for the set. A set's
        // equivalent expression tree is precomputed and saved as
        // the left child of the uset node.
        //
        // -------------------------------------------------------------------------
        internal void FlattenSets()
        {
            IBM.ICU.Impl.Assert.Assrt(fType != setRef);

            if (fLeftChild != null)
            {
                if (fLeftChild.fType == setRef)
                {
                    RBBINode setRefNode = fLeftChild;
                    RBBINode usetNode   = setRefNode.fLeftChild;
                    RBBINode replTree   = usetNode.fLeftChild;
                    fLeftChild         = replTree.CloneTree();
                    fLeftChild.fParent = this;
                }
                else
                {
                    fLeftChild.FlattenSets();
                }
            }

            if (fRightChild != null)
            {
                if (fRightChild.fType == setRef)
                {
                    RBBINode setRefNode_0 = fRightChild;
                    RBBINode usetNode_1   = setRefNode_0.fLeftChild;
                    RBBINode replTree_2   = usetNode_1.fLeftChild;
                    fRightChild         = replTree_2.CloneTree();
                    fRightChild.fParent = this;
                    // delete setRefNode;
                }
                else
                {
                    fRightChild.FlattenSets();
                }
            }
        }
示例#18
0
            // -------------------------------------------------------------------------------------
            //
            // RangeDescriptor::setDictionaryFlag
            //
            // Character Category Numbers that include characters from
            // the original Unicode Set named "dictionary" have bit 14
            // set to 1. The RBBI runtime engine uses this to trigger
            // use of the word dictionary.
            //
            // This function looks through the Unicode Sets that it
            // (the range) includes, and sets the bit in fNum when
            // "dictionary" is among them.
            //
            // TODO: a faster way would be to find the set node for
            // "dictionary" just once, rather than looking it
            // up by name every time.
            //
            // -------------------------------------------------------------------------------------
            internal void SetDictionaryFlag()
            {
                int i;

                for (i = 0; i < this.fIncludesSets.Count; i++)
                {
                    RBBINode usetNode = (RBBINode)fIncludesSets[i];
                    String   setName  = "";
                    RBBINode setRef   = usetNode.fParent;
                    if (setRef != null)
                    {
                        RBBINode varRef = setRef.fParent;
                        if (varRef != null && varRef.fType == IBM.ICU.Text.RBBINode.varRef)
                        {
                            setName = varRef.fText;
                        }
                    }
                    if (setName.Equals("dictionary"))
                    {
                        this.fNum |= 0x4000;
                        break;
                    }
                }
            }
示例#19
0
        internal void AddValToSet(RBBINode usetNode, int val)
        {
            RBBINode leafNode = new RBBINode(IBM.ICU.Text.RBBINode.leafChar);

            leafNode.fVal = val;
            if (usetNode.fLeftChild == null)
            {
                usetNode.fLeftChild = leafNode;
                leafNode.fParent    = usetNode;
            }
            else
            {
                // There are already input symbols present for this set.
                // Set up an OR node, with the previous stuff as the left child
                // and the new value as the right child.
                RBBINode orNode = new RBBINode(IBM.ICU.Text.RBBINode.opOr);
                orNode.fLeftChild          = usetNode.fLeftChild;
                orNode.fRightChild         = leafNode;
                orNode.fLeftChild.fParent  = orNode;
                orNode.fRightChild.fParent = orNode;
                usetNode.fLeftChild        = orNode;
                orNode.fParent             = usetNode;
            }
        }
示例#20
0
	    // ---------------------------------------------------------------------------------
	    //
	    // Parse RBBI rules. The state machine for rules parsing is here.
	    // The state tables are hand-written in the file rbbirpt.txt,
	    // and converted to the form used here by a perl
	    // script rbbicst.pl
	    //
	    // ---------------------------------------------------------------------------------
	    internal void Parse() {
	        int state;
	        RBBIRuleParseTable.RBBIRuleTableElement tableEl;
	
	        state = 1;
	        NextChar(fC);
	        //
	        // Main loop for the rule parsing state machine.
	        // Runs once per state transition.
	        // Each time through optionally performs, depending on the state table,
	        // - an advance to the the next input char
	        // - an action to be performed.
	        // - pushing or popping a state to/from the local state return stack.
	        //
	        for (;;) {
	            // Quit if state == 0. This is the normal way to exit the state
	            // machine.
	            //
	            if (state == 0) {
	                break;
	            }
	
	            // Find the state table element that matches the input char from the
	            // rule, or the
	            // class of the input character. Start with the first table row for
	            // this
	            // state, then linearly scan forward until we find a row that
	            // matches the
	            // character. The last row for each state always matches all
	            // characters, so
	            // the search will stop there, if not before.
	            //
	            tableEl = IBM.ICU.Text.RBBIRuleParseTable.gRuleParseStateTable[state];
	            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("scan") >= 0) {
	                System.Console.Out.WriteLine("char, line, col = (\'" + (char) fC.fChar
	                        + "\', " + fLineNum + ", " + fCharNum + "    state = "
	                        + tableEl.fStateName);
	            }
	
	            for (int tableRow = state;; tableRow++) { // loop over the state
	                                                      // table rows associated
	                                                      // with this state.
	                tableEl = IBM.ICU.Text.RBBIRuleParseTable.gRuleParseStateTable[tableRow];
	                if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("scan") >= 0) {
	                    System.Console.Out.Write(".");
	                }
	                if (tableEl.fCharClass < 127 && fC.fEscaped == false
	                        && tableEl.fCharClass == fC.fChar) {
	                    // Table row specified an individual character, not a set,
	                    // and
	                    // the input character is not escaped, and
	                    // the input character matched it.
	                    break;
	                }
	                if (tableEl.fCharClass == 255) {
	                    // Table row specified default, match anything character
	                    // class.
	                    break;
	                }
	                if (tableEl.fCharClass == 254 && fC.fEscaped) {
	                    // Table row specified "escaped" and the char was escaped.
	                    break;
	                }
	                if (tableEl.fCharClass == 253 && fC.fEscaped
	                        && (fC.fChar == 0x50 || fC.fChar == 0x70)) {
	                    // Table row specified "escaped P" and the char is either
	                    // 'p' or 'P'.
	                    break;
	                }
	                if (tableEl.fCharClass == 252 && fC.fChar == (int) -1) {
	                    // Table row specified eof and we hit eof on the input.
	                    break;
	                }
	
	                if (tableEl.fCharClass >= 128 && tableEl.fCharClass < 240 && // Table
	                                                                             // specs
	                                                                             // a
	                                                                             // char
	                                                                             // class
	                                                                             // &&
	                        fC.fEscaped == false && // char is not escaped &&
	                        fC.fChar != (int) -1) { // char is not EOF
	                    UnicodeSet uniset = fRuleSets[tableEl.fCharClass - 128];
	                    if (uniset.Contains(fC.fChar)) {
	                        // Table row specified a character class, or set of
	                        // characters,
	                        // and the current char matches it.
	                        break;
	                    }
	                }
	            }
	
	            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("scan") >= 0) {
	                System.Console.Out.WriteLine("");
	            }
	            //
	            // We've found the row of the state table that matches the current
	            // input
	            // character from the rules string.
	            // Perform any action specified by this row in the state table.
	            if (DoParseActions(tableEl.fAction) == false) {
	                // Break out of the state machine loop if the
	                // the action signalled some kind of error, or
	                // the action was to exit, occurs on normal end-of-rules-input.
	                break;
	            }
	
	            if (tableEl.fPushState != 0) {
	                fStackPtr++;
	                if (fStackPtr >= kStackSize) {
	                    System.Console.Out
	                            .WriteLine("RBBIRuleScanner.parse() - state stack overflow.");
	                    Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
	                }
	                fStack[fStackPtr] = tableEl.fPushState;
	            }
	
	            if (tableEl.fNextChar) {
	                NextChar(fC);
	            }
	
	            // Get the next state from the table entry, or from the
	            // state stack if the next state was specified as "pop".
	            if (tableEl.fNextState != 255) {
	                state = tableEl.fNextState;
	            } else {
	                state = fStack[fStackPtr];
	                fStackPtr--;
	                if (fStackPtr < 0) {
	                    System.Console.Out
	                            .WriteLine("RBBIRuleScanner.parse() - state stack underflow.");
	                    Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
	                }
	            }
	
	        }
	
	        //
	        // If there were NO user specified reverse rules, set up the equivalent
	        // of ".*;"
	        //
	        if (fRB.fTreeRoots[IBM.ICU.Text.RBBIRuleBuilder.fReverseTree] == null) {
	            fRB.fTreeRoots[IBM.ICU.Text.RBBIRuleBuilder.fReverseTree] = PushNewNode(IBM.ICU.Text.RBBINode.opStar);
	            RBBINode operand = PushNewNode(IBM.ICU.Text.RBBINode.setRef);
	            FindSetFor(kAny, operand, null);
	            fRB.fTreeRoots[IBM.ICU.Text.RBBIRuleBuilder.fReverseTree].fLeftChild = operand;
	            operand.fParent = fRB.fTreeRoots[IBM.ICU.Text.RBBIRuleBuilder.fReverseTree];
	            fNodeStackPtr -= 2;
	        }
	
	        //
	        // Parsing of the input RBBI rules is complete.
	        // We now have a parse tree for the rule expressions
	        // and a list of all UnicodeSets that are referenced.
	        //
	        if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("symbols") >= 0) {
	            fSymbolTable.RbbiSymtablePrint();
	        }
	        if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("ptree") >= 0) {
	            System.Console.Out.WriteLine("Completed Forward Rules Parse Tree...");
	            fRB.fTreeRoots[IBM.ICU.Text.RBBIRuleBuilder.fForwardTree].PrintTree(true);
	            System.Console.Out.WriteLine("\nCompleted Reverse Rules Parse Tree...");
	            fRB.fTreeRoots[IBM.ICU.Text.RBBIRuleBuilder.fReverseTree].PrintTree(true);
	            System.Console.Out
	                    .WriteLine("\nCompleted Safe Point Forward Rules Parse Tree...");
	            if (fRB.fTreeRoots[IBM.ICU.Text.RBBIRuleBuilder.fSafeFwdTree] == null) {
	                System.Console.Out.WriteLine("  -- null -- ");
	            } else {
	                fRB.fTreeRoots[IBM.ICU.Text.RBBIRuleBuilder.fSafeFwdTree].PrintTree(true);
	            }
	            System.Console.Out
	                    .WriteLine("\nCompleted Safe Point Reverse Rules Parse Tree...");
	            if (fRB.fTreeRoots[IBM.ICU.Text.RBBIRuleBuilder.fSafeRevTree] == null) {
	                System.Console.Out.WriteLine("  -- null -- ");
	            } else {
	                fRB.fTreeRoots[IBM.ICU.Text.RBBIRuleBuilder.fSafeRevTree].PrintTree(true);
	            }
	        }
	    }
示例#21
0
        // -----------------------------------------------------------------------------
        //
        // RBBITableBuilder::build - This is the main function for building the DFA
        // state transtion
        // table from the RBBI rules parse tree.
        //
        // -----------------------------------------------------------------------------
        internal void Build()
        {
            // If there were no rules, just return. This situation can easily arise
            // for the reverse rules.
            if (fRB.fTreeRoots[fRootIx] == null)
            {
                return;
            }

            //
            // Walk through the tree, replacing any references to $variables with a
            // copy of the
            // parse tree for the substition expression.
            //
            fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].FlattenVariables();
            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("ftree") >= 0)
            {
                System.Console.Out
                .WriteLine("Parse tree after flattening variable references.");
                fRB.fTreeRoots[fRootIx].PrintTree(true);
            }

            //
            // If the rules contained any references to {bof}
            // add a {bof} <cat> <former root of tree> to the
            // tree. Means that all matches must start out with the
            // {bof} fake character.
            //
            if (fRB.fSetBuilder.SawBOF())
            {
                RBBINode bofTop  = new RBBINode(IBM.ICU.Text.RBBINode.opCat);
                RBBINode bofLeaf = new RBBINode(IBM.ICU.Text.RBBINode.leafChar);
                bofTop.fLeftChild       = bofLeaf;
                bofTop.fRightChild      = fRB.fTreeRoots[fRootIx];
                bofLeaf.fParent         = bofTop;
                bofLeaf.fVal            = 2; // Reserved value for {bof}.
                fRB.fTreeRoots[fRootIx] = bofTop;
            }

            //
            // Add a unique right-end marker to the expression.
            // Appears as a cat-node, left child being the original tree,
            // right child being the end marker.
            //
            RBBINode cn = new RBBINode(IBM.ICU.Text.RBBINode.opCat);

            cn.fLeftChild = fRB.fTreeRoots[fRootIx];
            fRB.fTreeRoots[fRootIx].fParent = cn;
            cn.fRightChild          = new RBBINode(IBM.ICU.Text.RBBINode.endMark);
            cn.fRightChild.fParent  = cn;
            fRB.fTreeRoots[fRootIx] = cn;

            //
            // Replace all references to UnicodeSets with the tree for the
            // equivalent
            // expression.
            //
            fRB.fTreeRoots[fRootIx].FlattenSets();
            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("stree") >= 0)
            {
                System.Console.Out
                .WriteLine("Parse tree after flattening Unicode Set references.");
                fRB.fTreeRoots[fRootIx].PrintTree(true);
            }

            //
            // calculate the functions nullable, firstpos, lastpos and followpos on
            // nodes in the parse tree.
            // See the alogrithm description in Aho.
            // Understanding how this works by looking at the code alone will be
            // nearly impossible.
            //
            CalcNullable(fRB.fTreeRoots[fRootIx]);
            CalcFirstPos(fRB.fTreeRoots[fRootIx]);
            CalcLastPos(fRB.fTreeRoots[fRootIx]);
            CalcFollowPos(fRB.fTreeRoots[fRootIx]);
            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("pos") >= 0)
            {
                System.Console.Out.Write("\n");
                PrintPosSets(fRB.fTreeRoots[fRootIx]);
            }

            //
            // For "chained" rules, modify the followPos sets
            //
            if (fRB.fChainRules)
            {
                CalcChainedFollowPos(fRB.fTreeRoots[fRootIx]);
            }

            //
            // BOF (start of input) test fixup.
            //
            if (fRB.fSetBuilder.SawBOF())
            {
                BofFixup();
            }

            //
            // Build the DFA state transition tables.
            //
            BuildStateTable();
            FlagAcceptingStates();
            FlagLookAheadStates();
            FlagTaggedStates();

            //
            // Update the global table of rule status {tag} values
            // The rule builder has a global vector of status values that are common
            // for all tables. Merge the ones from this table into the global set.
            //
            MergeRuleStatusVals();

            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("states") >= 0)
            {
                PrintStates();
            }
        }
示例#22
0
        // -----------------------------------------------------------------------------
        //
        // calcChainedFollowPos. Modify the previously calculated followPos sets
        // to implement rule chaining. NOT described by Aho
        //
        // -----------------------------------------------------------------------------
        internal void CalcChainedFollowPos(RBBINode tree)
        {
            IList endMarkerNodes = new ArrayList();
            IList leafNodes      = new ArrayList();

            // get a list of all endmarker nodes.
            tree.FindNodes(endMarkerNodes, IBM.ICU.Text.RBBINode.endMark);

            // get a list all leaf nodes
            tree.FindNodes(leafNodes, IBM.ICU.Text.RBBINode.leafChar);

            // Get all nodes that can be the start a match, which is FirstPosition()
            // of the portion of the tree corresponding to user-written rules.
            // See the tree description in bofFixup().
            RBBINode userRuleRoot = tree;

            if (fRB.fSetBuilder.SawBOF())
            {
                userRuleRoot = tree.fLeftChild.fRightChild;
            }
            IBM.ICU.Impl.Assert.Assrt(userRuleRoot != null);
            ILOG.J2CsMapping.Collections.ISet matchStartNodes = userRuleRoot.fFirstPosSet;

            // Iteratate over all leaf nodes,
            //
            IIterator endNodeIx = new ILOG.J2CsMapping.Collections.IteratorAdapter(leafNodes.GetEnumerator());

            while (endNodeIx.HasNext())
            {
                RBBINode tNode   = (RBBINode)endNodeIx.Next();
                RBBINode endNode = null;

                // Identify leaf nodes that correspond to overall rule match
                // positions.
                // These include an endMarkerNode in their followPos sets.
                IIterator i = new ILOG.J2CsMapping.Collections.IteratorAdapter(endMarkerNodes.GetEnumerator());
                while (i.HasNext())
                {
                    RBBINode endMarkerNode = (RBBINode)i.Next();
                    if (ILOG.J2CsMapping.Collections.Collections.Contains(endMarkerNode, tNode.fFollowPos))
                    {
                        endNode = tNode;
                        break;
                    }
                }
                if (endNode == null)
                {
                    // node wasn't an end node. Try again with the next.
                    continue;
                }

                // We've got a node that can end a match.

                // Line Break Specific hack: If this node's val correspond to the
                // $CM char class,
                // don't chain from it.
                // TODO: Add rule syntax for this behavior, get specifics out of
                // here and
                // into the rule file.
                if (fRB.fLBCMNoChain)
                {
                    int c = this.fRB.fSetBuilder.GetFirstChar(endNode.fVal);
                    if (c != -1)
                    {
                        // c == -1 occurs with sets containing only the {eof} marker
                        // string.
                        int cLBProp = IBM.ICU.Lang.UCharacter.GetIntPropertyValue(c,
                                                                                  IBM.ICU.Lang.UProperty_Constants.LINE_BREAK);
                        if (cLBProp == IBM.ICU.Lang.UCharacter.LineBreak.COMBINING_MARK)
                        {
                            continue;
                        }
                    }
                }

                // Now iterate over the nodes that can start a match, looking for
                // ones
                // with the same char class as our ending node.
                RBBINode  startNode;
                IIterator startNodeIx = new ILOG.J2CsMapping.Collections.IteratorAdapter(matchStartNodes.GetEnumerator());
                while (startNodeIx.HasNext())
                {
                    startNode = (RBBINode)startNodeIx.Next();
                    if (startNode.fType != IBM.ICU.Text.RBBINode.leafChar)
                    {
                        continue;
                    }

                    if (endNode.fVal == startNode.fVal)
                    {
                        // The end val (character class) of one possible match is
                        // the
                        // same as the start of another.

                        // Add all nodes from the followPos of the start node to the
                        // followPos set of the end node, which will have the effect
                        // of
                        // letting matches transition from a match state at endNode
                        // to the second char of a match starting with startNode.
                        ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(startNode.fFollowPos, endNode.fFollowPos);
                    }
                }
            }
        }
示例#23
0
	    // ----------------------------------------------------------------------------------------
	    //
	    // doParseAction Do some action during rule parsing.
	    // Called by the parse state machine.
	    // Actions build the parse tree and Unicode Sets,
	    // and maintain the parse stack for nested expressions.
	    //
	    // ----------------------------------------------------------------------------------------
	    internal bool DoParseActions(int action) {
	        RBBINode n = null;
	
	        bool returnVal = true;
	
	        switch (action) {
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doExprStart:
	            PushNewNode(IBM.ICU.Text.RBBINode.opStart);
	            fRuleNum++;
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doExprOrOperator: {
	            FixOpStack(IBM.ICU.Text.RBBINode.precOpCat);
	            RBBINode operandNode = fNodeStack[fNodeStackPtr--];
	            RBBINode orNode = PushNewNode(IBM.ICU.Text.RBBINode.opOr);
	            orNode.fLeftChild = operandNode;
	            operandNode.fParent = orNode;
	        }
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doExprCatOperator:
	            // concatenation operator.
	            // For the implicit concatenation of adjacent terms in an expression
	            // that are
	            // not separated by any other operator. Action is invoked between
	            // the
	            // actions for the two terms.
	        {
	            FixOpStack(IBM.ICU.Text.RBBINode.precOpCat);
	            RBBINode operandNode_0 = fNodeStack[fNodeStackPtr--];
	            RBBINode catNode = PushNewNode(IBM.ICU.Text.RBBINode.opCat);
	            catNode.fLeftChild = operandNode_0;
	            operandNode_0.fParent = catNode;
	        }
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doLParen:
	            // Open Paren.
	            // The openParen node is a dummy operation type with a low
	            // precedence,
	            // which has the affect of ensuring that any real binary op that
	            // follows within the parens binds more tightly to the operands than
	            // stuff outside of the parens.
	            PushNewNode(IBM.ICU.Text.RBBINode.opLParen);
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doExprRParen:
	            FixOpStack(IBM.ICU.Text.RBBINode.precLParen);
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doNOP:
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doStartAssign:
	            // We've just scanned "$variable = "
	            // The top of the node stack has the $variable ref node.
	
	            // Save the start position of the RHS text in the StartExpression
	            // node
	            // that precedes the $variableReference node on the stack.
	            // This will eventually be used when saving the full $variable
	            // replacement
	            // text as a string.
	            n = fNodeStack[fNodeStackPtr - 1];
	            n.fFirstPos = fNextIndex; // move past the '='
	
	            // Push a new start-of-expression node; needed to keep parse of the
	            // RHS expression happy.
	            PushNewNode(IBM.ICU.Text.RBBINode.opStart);
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doEndAssign: {
	            // We have reached the end of an assignement statement.
	            // Current scan char is the ';' that terminates the assignment.
	
	            // Terminate expression, leaves expression parse tree rooted in TOS
	            // node.
	            FixOpStack(IBM.ICU.Text.RBBINode.precStart);
	
	            RBBINode startExprNode = fNodeStack[fNodeStackPtr - 2];
	            RBBINode varRefNode = fNodeStack[fNodeStackPtr - 1];
	            RBBINode RHSExprNode = fNodeStack[fNodeStackPtr];
	
	            // Save original text of right side of assignment, excluding the
	            // terminating ';'
	            // in the root of the node for the right-hand-side expression.
	            RHSExprNode.fFirstPos = startExprNode.fFirstPos;
	            RHSExprNode.fLastPos = fScanIndex;
	            // fRB.fRules.extractBetween(RHSExprNode.fFirstPos,
	            // RHSExprNode.fLastPos, RHSExprNode.fText);
	            RHSExprNode.fText = fRB.fRules.Substring(RHSExprNode.fFirstPos,(RHSExprNode.fLastPos)-(RHSExprNode.fFirstPos));
	
	            // Expression parse tree becomes l. child of the $variable reference
	            // node.
	            varRefNode.fLeftChild = RHSExprNode;
	            RHSExprNode.fParent = varRefNode;
	
	            // Make a symbol table entry for the $variableRef node.
	            fSymbolTable.AddEntry(varRefNode.fText, varRefNode);
	
	            // Clean up the stack.
	            fNodeStackPtr -= 3;
	            break;
	        }
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doEndOfRule: {
	            FixOpStack(IBM.ICU.Text.RBBINode.precStart); // Terminate expression, leaves
	                                            // expression
	
	            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("rtree") >= 0) {
	                PrintNodeStack("end of rule");
	            }
	            IBM.ICU.Impl.Assert.Assrt(fNodeStackPtr == 1);
	
	            // If this rule includes a look-ahead '/', add a endMark node to the
	            // expression tree.
	            if (fLookAheadRule) {
	                RBBINode thisRule = fNodeStack[fNodeStackPtr];
	                RBBINode endNode = PushNewNode(IBM.ICU.Text.RBBINode.endMark);
	                RBBINode catNode_1 = PushNewNode(IBM.ICU.Text.RBBINode.opCat);
	                fNodeStackPtr -= 2;
	                catNode_1.fLeftChild = thisRule;
	                catNode_1.fRightChild = endNode;
	                fNodeStack[fNodeStackPtr] = catNode_1;
	                endNode.fVal = fRuleNum;
	                endNode.fLookAheadEnd = true;
	            }
	
	            // All rule expressions are ORed together.
	            // The ';' that terminates an expression really just functions as a
	            // '|' with
	            // a low operator prededence.
	            //
	            // Each of the four sets of rules are collected separately.
	            // (forward, reverse, safe_forward, safe_reverse)
	            // OR this rule into the appropriate group of them.
	            //
	
	            int destRules = ((fReverseRule) ? IBM.ICU.Text.RBBIRuleBuilder.fReverseTree
	                    : fRB.fDefaultTree);
	
	            if (fRB.fTreeRoots[destRules] != null) {
	                // This is not the first rule encounted.
	                // OR previous stuff (from *destRules)
	                // with the current rule expression (on the Node Stack)
	                // with the resulting OR expression going to *destRules
	                //
	                RBBINode thisRule_2 = fNodeStack[fNodeStackPtr];
	                RBBINode prevRules = fRB.fTreeRoots[destRules];
	                RBBINode orNode_3 = PushNewNode(IBM.ICU.Text.RBBINode.opOr);
	                orNode_3.fLeftChild = prevRules;
	                prevRules.fParent = orNode_3;
	                orNode_3.fRightChild = thisRule_2;
	                thisRule_2.fParent = orNode_3;
	                fRB.fTreeRoots[destRules] = orNode_3;
	            } else {
	                // This is the first rule encountered (for this direction).
	                // Just move its parse tree from the stack to *destRules.
	                fRB.fTreeRoots[destRules] = fNodeStack[fNodeStackPtr];
	            }
	            fReverseRule = false; // in preparation for the next rule.
	            fLookAheadRule = false;
	            fNodeStackPtr = 0;
	        }
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doRuleError:
	            Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_RULE_SYNTAX);
	            returnVal = false;
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doVariableNameExpectedErr:
	            Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_RULE_SYNTAX);
	            break;
	
	        //
	        // Unary operands + ? *
	        // These all appear after the operand to which they apply.
	        // When we hit one, the operand (may be a whole sub expression)
	        // will be on the top of the stack.
	        // Unary Operator becomes TOS, with the old TOS as its one child.
	        case IBM.ICU.Text.RBBIRuleParseTable.doUnaryOpPlus: {
	            RBBINode operandNode_4 = fNodeStack[fNodeStackPtr--];
	            RBBINode plusNode = PushNewNode(IBM.ICU.Text.RBBINode.opPlus);
	            plusNode.fLeftChild = operandNode_4;
	            operandNode_4.fParent = plusNode;
	        }
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doUnaryOpQuestion: {
	            RBBINode operandNode_5 = fNodeStack[fNodeStackPtr--];
	            RBBINode qNode = PushNewNode(IBM.ICU.Text.RBBINode.opQuestion);
	            qNode.fLeftChild = operandNode_5;
	            operandNode_5.fParent = qNode;
	        }
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doUnaryOpStar: {
	            RBBINode operandNode_6 = fNodeStack[fNodeStackPtr--];
	            RBBINode starNode = PushNewNode(IBM.ICU.Text.RBBINode.opStar);
	            starNode.fLeftChild = operandNode_6;
	            operandNode_6.fParent = starNode;
	        }
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doRuleChar:
	            // A "Rule Character" is any single character that is a literal part
	            // of the regular expression. Like a, b and c in the expression
	            // "(abc*)
	            // | [:L:]"
	            // These are pretty uncommon in break rules; the terms are more
	            // commonly
	            // sets. To keep things uniform, treat these characters like as
	            // sets that just happen to contain only one character.
	        {
	            n = PushNewNode(IBM.ICU.Text.RBBINode.setRef);
	            String s = (new StringBuilder().Append((char) fC.fChar)).ToString();
	            FindSetFor(s, n, null);
	            n.fFirstPos = fScanIndex;
	            n.fLastPos = fNextIndex;
	            n.fText = fRB.fRules.Substring(n.fFirstPos,(n.fLastPos)-(n.fFirstPos));
	            break;
	        }
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doDotAny:
	            // scanned a ".", meaning match any single character.
	        {
	            n = PushNewNode(IBM.ICU.Text.RBBINode.setRef);
	            FindSetFor(kAny, n, null);
	            n.fFirstPos = fScanIndex;
	            n.fLastPos = fNextIndex;
	            n.fText = fRB.fRules.Substring(n.fFirstPos,(n.fLastPos)-(n.fFirstPos));
	            break;
	        }
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doSlash:
	            // Scanned a '/', which identifies a look-ahead break position in a
	            // rule.
	            n = PushNewNode(IBM.ICU.Text.RBBINode.lookAhead);
	            n.fVal = fRuleNum;
	            n.fFirstPos = fScanIndex;
	            n.fLastPos = fNextIndex;
	            n.fText = fRB.fRules.Substring(n.fFirstPos,(n.fLastPos)-(n.fFirstPos));
	            fLookAheadRule = true;
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doStartTagValue:
	            // Scanned a '{', the opening delimiter for a tag value within a
	            // rule.
	            n = PushNewNode(IBM.ICU.Text.RBBINode.tag);
	            n.fVal = 0;
	            n.fFirstPos = fScanIndex;
	            n.fLastPos = fNextIndex;
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doTagDigit:
	            // Just scanned a decimal digit that's part of a tag value
	        {
	            n = fNodeStack[fNodeStackPtr];
	            int v = ILOG.J2CsMapping.Util.Character.Digit((char) fC.fChar,10);
	            n.fVal = n.fVal * 10 + v;
	            break;
	        }
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doTagValue:
	            n = fNodeStack[fNodeStackPtr];
	            n.fLastPos = fNextIndex;
	            n.fText = fRB.fRules.Substring(n.fFirstPos,(n.fLastPos)-(n.fFirstPos));
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doTagExpectedError:
	            Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_MALFORMED_RULE_TAG);
	            returnVal = false;
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doOptionStart:
	            // Scanning a !!option. At the start of string.
	            fOptionStart = fScanIndex;
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doOptionEnd: {
	            String opt = fRB.fRules.Substring(fOptionStart,(fScanIndex)-(fOptionStart));
	            if (opt.Equals("chain")) {
	                fRB.fChainRules = true;
	            } else if (opt.Equals("LBCMNoChain")) {
	                fRB.fLBCMNoChain = true;
	            } else if (opt.Equals("forward")) {
	                fRB.fDefaultTree = IBM.ICU.Text.RBBIRuleBuilder.fForwardTree;
	            } else if (opt.Equals("reverse")) {
	                fRB.fDefaultTree = IBM.ICU.Text.RBBIRuleBuilder.fReverseTree;
	            } else if (opt.Equals("safe_forward")) {
	                fRB.fDefaultTree = IBM.ICU.Text.RBBIRuleBuilder.fSafeFwdTree;
	            } else if (opt.Equals("safe_reverse")) {
	                fRB.fDefaultTree = IBM.ICU.Text.RBBIRuleBuilder.fSafeRevTree;
	            } else if (opt.Equals("lookAheadHardBreak")) {
	                fRB.fLookAheadHardBreak = true;
	            } else {
	                Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_UNRECOGNIZED_OPTION);
	            }
	            break;
	        }
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doReverseDir:
	            fReverseRule = true;
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doStartVariableName:
	            n = PushNewNode(IBM.ICU.Text.RBBINode.varRef);
	            n.fFirstPos = fScanIndex;
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doEndVariableName:
	            n = fNodeStack[fNodeStackPtr];
	            if (n == null || n.fType != IBM.ICU.Text.RBBINode.varRef) {
	                Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
	                break;
	            }
	            n.fLastPos = fScanIndex;
	            n.fText = fRB.fRules.Substring(n.fFirstPos + 1,(n.fLastPos)-(n.fFirstPos + 1));
	            // Look the newly scanned name up in the symbol table
	            // If there's an entry, set the l. child of the var ref to the
	            // replacement expression.
	            // (We also pass through here when scanning assignments, but no harm
	            // is done, other
	            // than a slight wasted effort that seems hard to avoid. Lookup will
	            // be null)
	            n.fLeftChild = fSymbolTable.LookupNode(n.fText);
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doCheckVarDef:
	            n = fNodeStack[fNodeStackPtr];
	            if (n.fLeftChild == null) {
	                Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_UNDEFINED_VARIABLE);
	                returnVal = false;
	            }
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doExprFinished:
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doRuleErrorAssignExpr:
	            Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_ASSIGN_ERROR);
	            returnVal = false;
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doExit:
	            returnVal = false;
	            break;
	
	        case IBM.ICU.Text.RBBIRuleParseTable.doScanUnicodeSet:
	            ScanSet();
	            break;
	
	        default:
	            Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
	            returnVal = false;
	            break;
	        }
	        return returnVal;
	    }
示例#24
0
        // /CLOVER:ON

        // ------------------------------------------------------------------------
        //
        // printRangeGroups A debugging function.
        // dump out all of the range groups.
        //
        // ------------------------------------------------------------------------
        // /CLOVER:OFF
        internal void PrintRangeGroups()
        {
            RBBISetBuilder.RangeDescriptor rlRange;
            RBBISetBuilder.RangeDescriptor tRange;
            int i;
            int lastPrintedGroupNum = 0;

            System.Console.Out.Write("\nRanges grouped by Unicode Set Membership...\n");
            for (rlRange = fRangeList; rlRange != null; rlRange = rlRange.fNext)
            {
                int groupNum = rlRange.fNum & 0xbfff;
                if (groupNum > lastPrintedGroupNum)
                {
                    lastPrintedGroupNum = groupNum;
                    if (groupNum < 10)
                    {
                        System.Console.Out.Write(" ");
                    }
                    System.Console.Out.Write(groupNum + " ");

                    if ((rlRange.fNum & 0x4000) != 0)
                    {
                        System.Console.Out.Write(" <DICT> ");
                    }

                    for (i = 0; i < rlRange.fIncludesSets.Count; i++)
                    {
                        RBBINode usetNode = (RBBINode)rlRange.fIncludesSets[i];
                        String   setName  = "anon";
                        RBBINode setRef   = usetNode.fParent;
                        if (setRef != null)
                        {
                            RBBINode varRef = setRef.fParent;
                            if (varRef != null && varRef.fType == IBM.ICU.Text.RBBINode.varRef)
                            {
                                setName = varRef.fText;
                            }
                        }
                        System.Console.Out.Write(setName);
                        System.Console.Out.Write(" ");
                    }

                    i = 0;
                    for (tRange = rlRange; tRange != null; tRange = tRange.fNext)
                    {
                        if (tRange.fNum == rlRange.fNum)
                        {
                            if (i++ % 5 == 0)
                            {
                                System.Console.Out.Write("\n    ");
                            }
                            IBM.ICU.Text.RBBINode.PrintHex((int)tRange.fStartChar, -1);
                            System.Console.Out.Write("-");
                            IBM.ICU.Text.RBBINode.PrintHex((int)tRange.fEndChar, 0);
                        }
                    }
                    System.Console.Out.Write("\n");
                }
            }
            System.Console.Out.Write("\n");
        }