Beispiel #1
0
        //-----------------------------------------------------------------------------
        //
        //  mergeRuleStatusVals
        //
        //      Allocate positions in the  global array of rule status {tag} values
        //
        //      The RBBI runtime uses an array of {sets of status values} that can
        //      be returned for boundaries.  Each accepting state that has non-zero
        //      status includes an index into this array.  The format of the array
        //      is
        //           Num of status values in group 1
        //              status val
        //              status val
        //              ...
        //           Num of status vals in group 2
        //              status val
        //              status val
        //              ...
        //           etc.
        //
        //
        //-----------------------------------------------------------------------------

        internal virtual void MergeRuleStatusVals()
        {
            //
            //  The basic outline of what happens here is this...
            //
            //    for each state in this state table
            //       if the status tag list for this state is in the global statuses list
            //           record where and
            //           continue with the next state
            //       else
            //           add the tag list for this state to the global list.
            //
            int n;

            // Pre-load a single tag of {0} into the table.
            //   We will need this as a default, for rule sets with no explicit tagging,
            //   or with explicit tagging of {0}.
            if (fRB.fRuleStatusVals.Count == 0)
            {
                fRB.fRuleStatusVals.Add(1);    // Num of statuses in group
                fRB.fRuleStatusVals.Add(0);    //   and our single status of zero

                SortedSet <int> s0    = new SortedSet <int>();
                int             izero = 0;
                fRB.fStatusSets[s0] = izero;
                SortedSet <int> s1 = new SortedSet <int>();
                s1.Add(izero);
                fRB.fStatusSets[s0] = izero;
            }

            //    For each state, check whether the state's status tag values are
            //       already entered into the status values array, and add them if not.
            for (n = 0; n < fDStates.Count; n++)
            {
                RBBIStateDescriptor sd         = fDStates[n];
                SortedSet <int>     statusVals = sd.fTagVals;
                int?arrayIndexI = fRB.fStatusSets.Get(statusVals);
                if (arrayIndexI == null)
                {
                    // This is the first encounter of this set of status values.
                    //   Add them to the statusSets map, This map associates
                    //   the set of status values with an index in the runtime status
                    //   values array.
                    arrayIndexI = fRB.fRuleStatusVals.Count;
                    fRB.fStatusSets[statusVals] = arrayIndexI;

                    // Add the new set of status values to the vector of values that
                    //   will eventually become the array used by the runtime engine.
                    fRB.fRuleStatusVals.Add(statusVals.Count);
                    fRB.fRuleStatusVals.AddRange(statusVals);
                }

                // Save the runtime array index back into the state descriptor.
                sd.fTagsIdx = arrayIndexI.Value; // ICU4N NOTE: At this pint the value cannot be null
            }
        }
Beispiel #2
0
        //-----------------------------------------------------------------------------
        //
        //   flagAcceptingStates    Identify accepting states.
        //                          First get a list of all of the end marker nodes.
        //                          Then, for each state s,
        //                              if s contains one of the end marker nodes in its list of tree positions then
        //                                  s is an accepting state.
        //
        //-----------------------------------------------------------------------------
        internal virtual void FlagAcceptingStates()
        {
            IList <RBBINode> endMarkerNodes = new JCG.List <RBBINode>();
            RBBINode         endMarker;
            int i;
            int n;

            fRB.fTreeRoots[fRootIx].FindNodes(endMarkerNodes, RBBINode.endMark);

            for (i = 0; i < endMarkerNodes.Count; i++)
            {
                endMarker = endMarkerNodes[i];
                for (n = 0; n < fDStates.Count; n++)
                {
                    RBBIStateDescriptor sd = fDStates[n];
                    //if (sd.fPositions.indexOf(endMarker) >= 0) {
                    if (sd.fPositions.Contains(endMarker))
                    {
                        // Any non-zero value for fAccepting means this is an accepting node.
                        // The value is what will be returned to the user as the break status.
                        // If no other value was specified, force it to -1.

                        if (sd.fAccepting == 0)
                        {
                            // State hasn't been marked as accepting yet.  Do it now.
                            sd.fAccepting = endMarker.fVal;
                            if (sd.fAccepting == 0)
                            {
                                sd.fAccepting = -1;
                            }
                        }
                        if (sd.fAccepting == -1 && endMarker.fVal != 0)
                        {
                            // Both lookahead and non-lookahead accepting for this state.
                            // Favor the look-ahead.  Expedient for line break.
                            // TODO:  need a more elegant resolution for conflicting rules.
                            sd.fAccepting = endMarker.fVal;
                        }
                        // implicit else:
                        // if sd.fAccepting already had a value other than 0 or -1, leave it be.

                        // If the end marker node is from a look-ahead rule, set
                        //   the fLookAhead field or this state also.
                        if (endMarker.fLookAheadEnd)
                        {
                            // TODO:  don't change value if already set?
                            // TODO:  allow for more than one active look-ahead rule in engine.
                            //        Make value here an index to a side array in engine?
                            sd.fLookAhead = sd.fAccepting;
                        }
                    }
                }
            }
        }
Beispiel #3
0
        //-----------------------------------------------------------------------------
        //
        //    flagTaggedStates
        //
        //-----------------------------------------------------------------------------
        internal virtual void FlagTaggedStates()
        {
            IList <RBBINode> tagNodes = new JCG.List <RBBINode>();
            RBBINode         tagNode;
            int i;
            int n;

            fRB.fTreeRoots[fRootIx].FindNodes(tagNodes, RBBINode.tag);
            for (i = 0; i < tagNodes.Count; i++)
            {                   // For each tag node t (all of 'em)
                tagNode = tagNodes[i];

                for (n = 0; n < fDStates.Count; n++)
                {              //    For each state  s (row in the state table)
                    RBBIStateDescriptor sd = fDStates[n];
                    if (sd.fPositions.Contains(tagNode))
                    {       //       if  s include the tag node t
                        sd.fTagVals.Add(tagNode.fVal);
                    }
                }
            }
        }
Beispiel #4
0
        //-----------------------------------------------------------------------------
        //
        //    flagLookAheadStates   Very similar to flagAcceptingStates, above.
        //
        //-----------------------------------------------------------------------------
        internal virtual void FlagLookAheadStates()
        {
            IList <RBBINode> lookAheadNodes = new JCG.List <RBBINode>();
            RBBINode         lookAheadNode;
            int i;
            int n;

            fRB.fTreeRoots[fRootIx].FindNodes(lookAheadNodes, RBBINode.lookAhead);
            for (i = 0; i < lookAheadNodes.Count; i++)
            {
                lookAheadNode = lookAheadNodes[i];

                for (n = 0; n < fDStates.Count; n++)
                {
                    RBBIStateDescriptor sd = fDStates[n];
                    if (sd.fPositions.Contains(lookAheadNode))
                    {
                        sd.fLookAhead = lookAheadNode.fVal;
                    }
                }
            }
        }
Beispiel #5
0
        //-----------------------------------------------------------------------------
        //
        //   printStates    Debug Function.  Dump the fully constructed state transition table.
        //
        //-----------------------------------------------------------------------------

        internal virtual void PrintStates()
        {
            int c;    // input "character"
            int n;    // state number

            Console.Out.Write("state |           i n p u t     s y m b o l s \n");
            Console.Out.Write("      | Acc  LA    Tag");
            for (c = 0; c < fRB.fSetBuilder.NumCharCategories; c++)
            {
                RBBINode.PrintInt32(c, 3);
            }
            Console.Out.Write("\n");
            Console.Out.Write("      |---------------");
            for (c = 0; c < fRB.fSetBuilder.NumCharCategories; c++)
            {
                Console.Out.Write("---");
            }
            Console.Out.Write("\n");

            for (n = 0; n < fDStates.Count; n++)
            {
                RBBIStateDescriptor sd = fDStates[n];
                RBBINode.PrintInt32(n, 5);
                Console.Out.Write(" | ");

                RBBINode.PrintInt32(sd.fAccepting, 3);
                RBBINode.PrintInt32(sd.fLookAhead, 4);
                RBBINode.PrintInt32(sd.fTagsIdx, 6);
                Console.Out.Write(" ");
                for (c = 0; c < fRB.fSetBuilder.NumCharCategories; c++)
                {
                    RBBINode.PrintInt32(sd.fDtran[c], 3);
                }
                Console.Out.Write("\n");
            }
            Console.Out.Write("\n\n");
        }
Beispiel #6
0
        //-----------------------------------------------------------------------------
        //
        //   exportTable()    export the state transition table in the ICU4C format.
        //
        //                    Most of the table is 16 bit shorts.  This function exports
        //                    the whole thing as an array of shorts.
        //
        //                    The size of the array must be rounded up to a multiple of
        //                    8 bytes.
        //
        //                    See struct RBBIStateTable in ICU4C, common/rbbidata.h
        //
        //-----------------------------------------------------------------------------

        internal virtual short[] ExportTable()
        {
            int state;
            int col;

            if (fRB.fTreeRoots[fRootIx] == null)
            {
                return(new short[0]);
            }

            Assert.Assrt(fRB.fSetBuilder.NumCharCategories < 0x7fff &&
                         fDStates.Count < 0x7fff);

            int numStates = fDStates.Count;

            // Size of table size in shorts.
            //  the "4" is the size of struct RBBIStateTableRow, the row header part only.
            int rowLen    = 4 + fRB.fSetBuilder.NumCharCategories;
            int tableSize = GetTableSize() / 2;


            short[] table = new short[tableSize];

            //
            // Fill in the header fields.
            //      Annoying because they really want to be ints, not shorts.
            //
            // RBBIStateTable.fNumStates
            table[RBBIDataWrapper.NUMSTATES]     = (short)(numStates.TripleShift(16));
            table[RBBIDataWrapper.NUMSTATES + 1] = (short)(numStates & 0x0000ffff);

            // RBBIStateTable.fRowLen
            table[RBBIDataWrapper.ROWLEN]     = (short)(rowLen.TripleShift(16));
            table[RBBIDataWrapper.ROWLEN + 1] = (short)(rowLen & 0x0000ffff);

            // RBBIStateTable.fFlags
            int flags = 0;

            if (fRB.fLookAheadHardBreak)
            {
                flags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK;
            }
            if (fRB.fSetBuilder.SawBOF)
            {
                flags |= RBBIDataWrapper.RBBI_BOF_REQUIRED;
            }
            table[RBBIDataWrapper.FLAGS]     = (short)(flags.TripleShift(16));
            table[RBBIDataWrapper.FLAGS + 1] = (short)(flags & 0x0000ffff);

            int numCharCategories = fRB.fSetBuilder.NumCharCategories;

            for (state = 0; state < numStates; state++)
            {
                RBBIStateDescriptor sd = fDStates[state];
                int row = 8 + state * rowLen;
                Assert.Assrt(-32768 < sd.fAccepting && sd.fAccepting <= 32767);
                Assert.Assrt(-32768 < sd.fLookAhead && sd.fLookAhead <= 32767);
                table[row + RBBIDataWrapper.ACCEPTING] = (short)sd.fAccepting;
                table[row + RBBIDataWrapper.LOOKAHEAD] = (short)sd.fLookAhead;
                table[row + RBBIDataWrapper.TAGIDX]    = (short)sd.fTagsIdx;
                for (col = 0; col < numCharCategories; col++)
                {
                    table[row + RBBIDataWrapper.NEXTSTATES + col] = (short)sd.fDtran[col];
                }
            }
            return(table);
        }
Beispiel #7
0
        //-----------------------------------------------------------------------------
        //
        //   buildStateTable()    Determine the set of runtime DFA states and the
        //                        transition tables for these states, by the algorithm
        //                        of fig. 3.44 in Aho.
        //
        //                        Most of the comments are quotes of Aho's psuedo-code.
        //
        //-----------------------------------------------------------------------------
        internal virtual void BuildStateTable()
        {
            //
            // Add a dummy state 0 - the stop state.  Not from Aho.
            int lastInputSymbol           = fRB.fSetBuilder.NumCharCategories - 1;
            RBBIStateDescriptor failState = new RBBIStateDescriptor(lastInputSymbol);

            fDStates.Add(failState);

            // initially, the only unmarked state in Dstates is firstpos(root),
            //       where toot is the root of the syntax tree for (r)#;
            RBBIStateDescriptor initialState = new RBBIStateDescriptor(lastInputSymbol);

            initialState.fPositions.UnionWith(fRB.fTreeRoots[fRootIx].fFirstPosSet);
            fDStates.Add(initialState);

            // while there is an unmarked state T in Dstates do begin
            for (; ;)
            {
                RBBIStateDescriptor T = null;
                int tx;
                for (tx = 1; tx < fDStates.Count; tx++)
                {
                    RBBIStateDescriptor temp = fDStates[tx];
                    if (temp.fMarked == false)
                    {
                        T = temp;
                        break;
                    }
                }
                if (T == null)
                {
                    break;
                }

                // mark T;
                T.fMarked = true;

                // for each input symbol a do begin
                int a;
                for (a = 1; a <= lastInputSymbol; a++)
                {
                    // let U be the set of positions that are in followpos(p)
                    //    for some position p in T
                    //    such that the symbol at position p is a;
                    ISet <RBBINode> U = null;
                    foreach (RBBINode p in T.fPositions)
                    {
                        if ((p.fType == RBBINode.leafChar) && (p.fVal == a))
                        {
                            if (U == null)
                            {
                                U = new JCG.HashSet <RBBINode>(p.fFollowPos.Count);
                            }
                            U.UnionWith(p.fFollowPos);
                        }
                    }

                    // if U is not empty and not in DStates then
                    int  ux         = 0;
                    bool UinDstates = false;
                    if (U != null)
                    {
                        Assert.Assrt(U.Count > 0);
                        int ix;
                        for (ix = 0; ix < fDStates.Count; ix++)
                        {
                            RBBIStateDescriptor temp2;
                            temp2 = fDStates[ix];
                            if (SetEqualityComparer <RBBINode> .Default.Equals(U, temp2.fPositions))
                            {
                                U          = temp2.fPositions;
                                ux         = ix;
                                UinDstates = true;
                                break;
                            }
                        }

                        // Add U as an unmarked state to Dstates
                        if (!UinDstates)
                        {
                            RBBIStateDescriptor newState = new RBBIStateDescriptor(lastInputSymbol);
                            newState.fPositions = U;
                            fDStates.Add(newState);
                            ux = fDStates.Count - 1;
                        }

                        // Dtran[T, a] := U;
                        T.fDtran[a] = ux;
                    }
                }
            }
        }