// ----------------------------------------------------------------------------- // // flagTaggedStates // // ----------------------------------------------------------------------------- internal void FlagTaggedStates() { IList tagNodes = new ArrayList(); RBBINode tagNode; int i; int n; fRB.fTreeRoots[fRootIx].FindNodes(tagNodes, IBM.ICU.Text.RBBINode.tag); for (i = 0; i < tagNodes.Count; i++) // For each tag node t (all of // 'em) { tagNode = (RBBINode)tagNodes[i]; for (n = 0; n < fDStates.Count; n++) // For each state s (row in // the state table) { RBBITableBuilder.RBBIStateDescriptor sd = (RBBITableBuilder.RBBIStateDescriptor)fDStates[n]; if (ILOG.J2CsMapping.Collections.Collections.Contains(tagNode, sd.fPositions)) // if s include the tag // node t { ILOG.J2CsMapping.Collections.Generics.Collections.Add(sd.fTagVals, ((int)(tagNode.fVal))); } } } }
// ----------------------------------------------------------------------------- // // flagAcceptingStates Identify accepting states. // First get a list of all of the end marker nodes. // Then, for each state s, // if s contains one of the end marker nodes in its list of tree positions // then // s is an accepting state. // // ----------------------------------------------------------------------------- internal void FlagAcceptingStates() { IList endMarkerNodes = new ArrayList(); RBBINode endMarker; int i; int n; fRB.fTreeRoots[fRootIx].FindNodes(endMarkerNodes, IBM.ICU.Text.RBBINode.endMark); for (i = 0; i < endMarkerNodes.Count; i++) { endMarker = (RBBINode)endMarkerNodes[i]; for (n = 0; n < fDStates.Count; n++) { RBBITableBuilder.RBBIStateDescriptor sd = (RBBITableBuilder.RBBIStateDescriptor)fDStates[n]; // if (sd.fPositions.indexOf(endMarker) >= 0) { if (ILOG.J2CsMapping.Collections.Collections.Contains(endMarker, sd.fPositions)) { // Any non-zero value for fAccepting means this is an // accepting node. // The value is what will be returned to the user as the // break status. // If no other value was specified, force it to -1. if (sd.fAccepting == 0) { // State hasn't been marked as accepting yet. Do it now. sd.fAccepting = endMarker.fVal; if (sd.fAccepting == 0) { sd.fAccepting = -1; } } if (sd.fAccepting == -1 && endMarker.fVal != 0) { // Both lookahead and non-lookahead accepting for this // state. // Favor the look-ahead. Expedient for line break. // TODO: need a more elegant resolution for conflicting // rules. sd.fAccepting = endMarker.fVal; } // implicit else: // if sd.fAccepting already had a value other than 0 or -1, // leave it be. // If the end marker node is from a look-ahead rule, set // the fLookAhead field or this state also. if (endMarker.fLookAheadEnd) { // TODO: don't change value if already set? // TODO: allow for more than one active look-ahead rule // in engine. // Make value here an index to a side array in engine? sd.fLookAhead = sd.fAccepting; } } } } }
// ----------------------------------------------------------------------------- // // flagLookAheadStates Very similar to flagAcceptingStates, above. // // ----------------------------------------------------------------------------- internal void FlagLookAheadStates() { IList lookAheadNodes = new ArrayList(); RBBINode lookAheadNode; int i; int n; fRB.fTreeRoots[fRootIx].FindNodes(lookAheadNodes, IBM.ICU.Text.RBBINode.lookAhead); for (i = 0; i < lookAheadNodes.Count; i++) { lookAheadNode = (RBBINode)lookAheadNodes[i]; for (n = 0; n < fDStates.Count; n++) { RBBITableBuilder.RBBIStateDescriptor sd = (RBBITableBuilder.RBBIStateDescriptor)fDStates[n]; if (ILOG.J2CsMapping.Collections.Collections.Contains(lookAheadNode, sd.fPositions)) { sd.fLookAhead = lookAheadNode.fVal; } } } }
// ----------------------------------------------------------------------------- // // printStates Debug Function. Dump the fully constructed state transition // table. // // ----------------------------------------------------------------------------- internal void PrintStates() { int c; // input "character" int n; // state number System.Console.Out.Write("state | i n p u t s y m b o l s \n"); System.Console.Out.Write(" | Acc LA Tag"); for (c = 0; c < fRB.fSetBuilder.GetNumCharCategories(); c++) { IBM.ICU.Text.RBBINode.PrintInt((int)c, 3); } System.Console.Out.Write("\n"); System.Console.Out.Write(" |---------------"); for (c = 0; c < fRB.fSetBuilder.GetNumCharCategories(); c++) { System.Console.Out.Write("---"); } System.Console.Out.Write("\n"); for (n = 0; n < fDStates.Count; n++) { RBBITableBuilder.RBBIStateDescriptor sd = (RBBITableBuilder.RBBIStateDescriptor)fDStates[n]; IBM.ICU.Text.RBBINode.PrintInt(n, 5); System.Console.Out.Write(" | "); IBM.ICU.Text.RBBINode.PrintInt(sd.fAccepting, 3); IBM.ICU.Text.RBBINode.PrintInt(sd.fLookAhead, 4); IBM.ICU.Text.RBBINode.PrintInt(sd.fTagsIdx, 6); System.Console.Out.Write(" "); for (c = 0; c < fRB.fSetBuilder.GetNumCharCategories(); c++) { IBM.ICU.Text.RBBINode.PrintInt(sd.fDtran[c], 3); } System.Console.Out.Write("\n"); } System.Console.Out.Write("\n\n"); }
// ----------------------------------------------------------------------------- // // exportTable() export the state transition table in the ICU4C format. // // Most of the table is 16 bit shorts. This function exports // the whole thing as an array of shorts. // // The size of the array must be rounded up to a multiple of // 8 bytes. // // See struct RBBIStateTable in ICU4C, common/rbbidata.h // // ----------------------------------------------------------------------------- internal short[] ExportTable() { int state; int col; if (fRB.fTreeRoots[fRootIx] == null) { return(new short[0]); } IBM.ICU.Impl.Assert.Assrt(fRB.fSetBuilder.GetNumCharCategories() < 0x7fff && fDStates.Count < 0x7fff); int numStates = fDStates.Count; // Size of table size in shorts. // the "4" is the size of struct RBBIStateTableRow, the row header part // only. int rowLen = 4 + fRB.fSetBuilder.GetNumCharCategories(); int tableSize = GetTableSize() / 2; short[] table = new short[tableSize]; // // Fill in the header fields. // Annoying because they really want to be ints, not shorts. // // RBBIStateTable.fNumStates table[IBM.ICU.Text.RBBIDataWrapper.NUMSTATES] = (short)((int)(((uint)numStates) >> 16)); table[IBM.ICU.Text.RBBIDataWrapper.NUMSTATES + 1] = (short)(numStates & 0x0000ffff); // RBBIStateTable.fRowLen table[IBM.ICU.Text.RBBIDataWrapper.ROWLEN] = (short)((int)(((uint)rowLen) >> 16)); table[IBM.ICU.Text.RBBIDataWrapper.ROWLEN + 1] = (short)(rowLen & 0x0000ffff); // RBBIStateTable.fFlags int flags = 0; if (fRB.fLookAheadHardBreak) { flags |= IBM.ICU.Text.RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK; } if (fRB.fSetBuilder.SawBOF()) { flags |= IBM.ICU.Text.RBBIDataWrapper.RBBI_BOF_REQUIRED; } table[IBM.ICU.Text.RBBIDataWrapper.FLAGS] = (short)((int)(((uint)flags) >> 16)); table[IBM.ICU.Text.RBBIDataWrapper.FLAGS + 1] = (short)(flags & 0x0000ffff); int numCharCategories = fRB.fSetBuilder.GetNumCharCategories(); for (state = 0; state < numStates; state++) { RBBITableBuilder.RBBIStateDescriptor sd = (RBBITableBuilder.RBBIStateDescriptor)fDStates[state]; int row = 8 + state * rowLen; IBM.ICU.Impl.Assert.Assrt(-32768 < sd.fAccepting && sd.fAccepting <= 32767); IBM.ICU.Impl.Assert.Assrt(-32768 < sd.fLookAhead && sd.fLookAhead <= 32767); table[row + IBM.ICU.Text.RBBIDataWrapper.ACCEPTING] = (short)sd.fAccepting; table[row + IBM.ICU.Text.RBBIDataWrapper.LOOKAHEAD] = (short)sd.fLookAhead; table[row + IBM.ICU.Text.RBBIDataWrapper.TAGIDX] = (short)sd.fTagsIdx; for (col = 0; col < numCharCategories; col++) { table[row + IBM.ICU.Text.RBBIDataWrapper.NEXTSTATES + col] = (short)sd.fDtran[col]; } } return(table); }
// ----------------------------------------------------------------------------- // // mergeRuleStatusVals // // Allocate positions in the global array of rule status {tag} values // // The RBBI runtime uses an array of {sets of status values} that can // be returned for boundaries. Each accepting state that has non-zero // status includes an index into this array. The format of the array // is // Num of status values in group 1 // status val // status val // ... // Num of status vals in group 2 // status val // status val // ... // etc. // // // ----------------------------------------------------------------------------- internal void MergeRuleStatusVals() { // // The basic outline of what happens here is this... // // for each state in this state table // if the status tag list for this state is in the global statuses list // record where and // continue with the next state // else // add the tag list for this state to the global list. // int n; // Pre-load a single tag of {0} into the table. // We will need this as a default, for rule sets with no explicit // tagging, // or with explicit tagging of {0}. if (fRB.fRuleStatusVals.Count == 0) { ILOG.J2CsMapping.Collections.Generics.Collections.Add(fRB.fRuleStatusVals, ((int)(1))); // Num of statuses in group ILOG.J2CsMapping.Collections.Generics.Collections.Add(fRB.fRuleStatusVals, ((int)(0))); // and our single status of // zero SortedSet s0 = new SortedSet(); Int32 izero = ((int)(0)); ILOG.J2CsMapping.Collections.Collections.Put(fRB.fStatusSets, s0, izero); SortedSet s1 = new SortedSet(); ILOG.J2CsMapping.Collections.Generics.Collections.Add(s1, izero); ILOG.J2CsMapping.Collections.Collections.Put(fRB.fStatusSets, s0, izero); } // For each state, check whether the state's status tag values are // already entered into the status values array, and add them if not. for (n = 0; n < fDStates.Count; n++) { RBBITableBuilder.RBBIStateDescriptor sd = (RBBITableBuilder.RBBIStateDescriptor)fDStates[n]; ILOG.J2CsMapping.Collections.ISet statusVals = sd.fTagVals; Int32 arrayIndexI = (Int32)ILOG.J2CsMapping.Collections.Collections.Get(fRB.fStatusSets, statusVals); if (arrayIndexI == null) { // This is the first encounter of this set of status values. // Add them to the statusSets map, This map associates // the set of status values with an index in the runtime status // values array. arrayIndexI = ((int)(fRB.fRuleStatusVals.Count)); ILOG.J2CsMapping.Collections.Collections.Put(fRB.fStatusSets, statusVals, arrayIndexI); // Add the new set of status values to the vector of values that // will eventually become the array used by the runtime engine. ILOG.J2CsMapping.Collections.Generics.Collections.Add(fRB.fRuleStatusVals, ((int)(statusVals.Count))); IIterator it = new ILOG.J2CsMapping.Collections.IteratorAdapter(statusVals.GetEnumerator()); while (it.HasNext()) { ILOG.J2CsMapping.Collections.Generics.Collections.Add(fRB.fRuleStatusVals, it.Next()); } } // Save the runtime array index back into the state descriptor. sd.fTagsIdx = arrayIndexI; } }
// ----------------------------------------------------------------------------- // // buildStateTable() Determine the set of runtime DFA states and the // transition tables for these states, by the algorithm // of fig. 3.44 in Aho. // // Most of the comments are quotes of Aho's psuedo-code. // // ----------------------------------------------------------------------------- internal void BuildStateTable() { // // Add a dummy state 0 - the stop state. Not from Aho. int lastInputSymbol = fRB.fSetBuilder.GetNumCharCategories() - 1; RBBITableBuilder.RBBIStateDescriptor failState = new RBBITableBuilder.RBBIStateDescriptor(lastInputSymbol); ILOG.J2CsMapping.Collections.Generics.Collections.Add(fDStates, failState); // initially, the only unmarked state in Dstates is firstpos(root), // where toot is the root of the syntax tree for (r)#; RBBITableBuilder.RBBIStateDescriptor initialState = new RBBITableBuilder.RBBIStateDescriptor( lastInputSymbol); ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(fRB.fTreeRoots[fRootIx].fFirstPosSet, initialState.fPositions); ILOG.J2CsMapping.Collections.Generics.Collections.Add(fDStates, initialState); // while there is an unmarked state T in Dstates do begin for (;;) { RBBITableBuilder.RBBIStateDescriptor T = null; int tx; for (tx = 1; tx < fDStates.Count; tx++) { RBBITableBuilder.RBBIStateDescriptor temp = (RBBITableBuilder.RBBIStateDescriptor)fDStates[tx]; if (temp.fMarked == false) { T = temp; break; } } if (T == null) { break; } // mark T; T.fMarked = true; // for each input symbol a do begin int a; for (a = 1; a <= lastInputSymbol; a++) { // let U be the set of positions that are in followpos(p) // for some position p in T // such that the symbol at position p is a; ILOG.J2CsMapping.Collections.ISet U = null; RBBINode p; IIterator pit = new ILOG.J2CsMapping.Collections.IteratorAdapter(T.fPositions.GetEnumerator()); while (pit.HasNext()) { p = (RBBINode)pit.Next(); if ((p.fType == IBM.ICU.Text.RBBINode.leafChar) && (p.fVal == a)) { if (U == null) { U = new HashedSet(); } ILOG.J2CsMapping.Collections.Generics.Collections.AddAll(p.fFollowPos, U); } } // if U is not empty and not in DStates then int ux = 0; bool UinDstates = false; if (U != null) { IBM.ICU.Impl.Assert.Assrt(U.Count > 0); int ix; for (ix = 0; ix < fDStates.Count; ix++) { RBBITableBuilder.RBBIStateDescriptor temp2; temp2 = (RBBITableBuilder.RBBIStateDescriptor)fDStates[ix]; if (U.Equals(temp2.fPositions)) { U = temp2.fPositions; ux = ix; UinDstates = true; break; } } // Add U as an unmarked state to Dstates if (!UinDstates) { RBBITableBuilder.RBBIStateDescriptor newState = new RBBITableBuilder.RBBIStateDescriptor( lastInputSymbol); newState.fPositions = U; ILOG.J2CsMapping.Collections.Generics.Collections.Add(fDStates, newState); ux = fDStates.Count - 1; } // Dtran[T, a] := U; T.fDtran[a] = ux; } } } }