protected virtual void LoadRange(int aRange) { nextElement = set.GetRangeStart(aRange); endElement = set.GetRangeEnd(aRange); }
//------------------------------------------------------------------------ // // build Build the list of non-overlapping character ranges // from the Unicode Sets. // //------------------------------------------------------------------------ internal virtual void Build() { RangeDescriptor rlRange; if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("usets", StringComparison.Ordinal) >= 0) { PrintSets(); } // Initialize the process by creating a single range encompassing all characters // that is in no sets. // fRangeList = new RangeDescriptor(); fRangeList.fStartChar = 0; fRangeList.fEndChar = 0x10ffff; // // Find the set of non-overlapping ranges of characters // foreach (RBBINode usetNode in fRB.fUSetNodes) { UnicodeSet inputSet = usetNode.fInputSet; int inputSetRangeCount = inputSet.RangeCount; int inputSetRangeIndex = 0; rlRange = fRangeList; for (; ;) { if (inputSetRangeIndex >= inputSetRangeCount) { break; } int inputSetRangeBegin = inputSet.GetRangeStart(inputSetRangeIndex); int inputSetRangeEnd = inputSet.GetRangeEnd(inputSetRangeIndex); // skip over ranges from the range list that are completely // below the current range from the input unicode set. while (rlRange.fEndChar < inputSetRangeBegin) { rlRange = rlRange.fNext; } // If the start of the range from the range list is before with // the start of the range from the unicode set, split the range list range // in two, with one part being before (wholly outside of) the unicode set // and the other containing the rest. // Then continue the loop; the post-split current range will then be skipped // over if (rlRange.fStartChar < inputSetRangeBegin) { rlRange.Split(inputSetRangeBegin); continue; } // Same thing at the end of the ranges... // If the end of the range from the range list doesn't coincide with // the end of the range from the unicode set, split the range list // range in two. The first part of the split range will be // wholly inside the Unicode set. if (rlRange.fEndChar > inputSetRangeEnd) { rlRange.Split(inputSetRangeEnd + 1); } // The current rlRange is now entirely within the UnicodeSet range. // Add this unicode set to the list of sets for this rlRange if (rlRange.fIncludesSets.IndexOf(usetNode) == -1) { rlRange.fIncludesSets.Add(usetNode); } // Advance over ranges that we are finished with. if (inputSetRangeEnd == rlRange.fEndChar) { inputSetRangeIndex++; } rlRange = rlRange.fNext; } } if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("range", StringComparison.Ordinal) >= 0) { PrintRanges(); } // // Group the above ranges, with each group consisting of one or more // ranges that are in exactly the same set of original UnicodeSets. // The groups are numbered, and these group numbers are the set of // input symbols recognized by the run-time state machine. // // Numbering: # 0 (state table column 0) is unused. // # 1 is reserved - table column 1 is for end-of-input // # 2 is reserved - table column 2 is for beginning-in-input // # 3 is the first range list. // RangeDescriptor rlSearchRange; for (rlRange = fRangeList; rlRange != null; rlRange = rlRange.fNext) { for (rlSearchRange = fRangeList; rlSearchRange != rlRange; rlSearchRange = rlSearchRange.fNext) { if (ListEqualityComparer <RBBINode> .Default.Equals(rlRange.fIncludesSets, rlSearchRange.fIncludesSets)) { rlRange.fNum = rlSearchRange.fNum; break; } } if (rlRange.fNum == 0) { fGroupCount++; rlRange.fNum = fGroupCount + 2; rlRange.SetDictionaryFlag(); AddValToSets(rlRange.fIncludesSets, fGroupCount + 2); } } // Handle input sets that contain the special string {eof}. // Column 1 of the state table is reserved for EOF on input. // Column 2 is reserved for before-the-start-input. // (This column can be optimized away later if there are no rule // references to {bof}.) // Add this column value (1 or 2) to the equivalent expression // subtree for each UnicodeSet that contains the string {eof} // Because {bof} and {eof} are not a characters in the normal sense, // they doesn't affect the computation of ranges or TRIE. string eofString = "eof"; string bofString = "bof"; foreach (RBBINode usetNode in fRB.fUSetNodes) { UnicodeSet inputSet = usetNode.fInputSet; if (inputSet.Contains(eofString)) { AddValToSet(usetNode, 1); } if (inputSet.Contains(bofString)) { AddValToSet(usetNode, 2); fSawBOF = true; } } if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("rgroup", StringComparison.Ordinal) >= 0) { PrintRangeGroups(); } if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("esets", StringComparison.Ordinal) >= 0) { PrintSets(); } fTrie = new Trie2Writable(0, // Initial value for all code points. 0); // Error value for out-of-range input. for (rlRange = fRangeList; rlRange != null; rlRange = rlRange.fNext) { fTrie.SetRange( rlRange.fStartChar, // Range start rlRange.fEndChar, // Range end (inclusive) rlRange.fNum, // value for range true // Overwrite previously written values ); } }
internal virtual void LoadRange(int aRange) // ICU4N specific - marked internal instead of protected, since the functionality is obsolete { nextElement = set.GetRangeStart(aRange); endElement = set.GetRangeEnd(aRange); }