コード例 #1
0
ファイル: UnicodeSetIterator.cs プロジェクト: SilentCC/ICU4N
 protected virtual void LoadRange(int aRange)
 {
     nextElement = set.GetRangeStart(aRange);
     endElement  = set.GetRangeEnd(aRange);
 }
コード例 #2
0
        //------------------------------------------------------------------------
        //
        //           build          Build the list of non-overlapping character ranges
        //                          from the Unicode Sets.
        //
        //------------------------------------------------------------------------
        internal virtual void Build()
        {
            RangeDescriptor rlRange;

            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("usets", StringComparison.Ordinal) >= 0)
            {
                PrintSets();
            }

            //  Initialize the process by creating a single range encompassing all characters
            //  that is in no sets.
            //
            fRangeList            = new RangeDescriptor();
            fRangeList.fStartChar = 0;
            fRangeList.fEndChar   = 0x10ffff;

            //
            //  Find the set of non-overlapping ranges of characters
            //
            foreach (RBBINode usetNode in fRB.fUSetNodes)
            {
                UnicodeSet inputSet           = usetNode.fInputSet;
                int        inputSetRangeCount = inputSet.RangeCount;
                int        inputSetRangeIndex = 0;
                rlRange = fRangeList;

                for (; ;)
                {
                    if (inputSetRangeIndex >= inputSetRangeCount)
                    {
                        break;
                    }
                    int inputSetRangeBegin = inputSet.GetRangeStart(inputSetRangeIndex);
                    int inputSetRangeEnd   = inputSet.GetRangeEnd(inputSetRangeIndex);

                    // skip over ranges from the range list that are completely
                    //   below the current range from the input unicode set.
                    while (rlRange.fEndChar < inputSetRangeBegin)
                    {
                        rlRange = rlRange.fNext;
                    }

                    // If the start of the range from the range list is before with
                    //   the start of the range from the unicode set, split the range list range
                    //   in two, with one part being before (wholly outside of) the unicode set
                    //   and the other containing the rest.
                    //   Then continue the loop; the post-split current range will then be skipped
                    //     over
                    if (rlRange.fStartChar < inputSetRangeBegin)
                    {
                        rlRange.Split(inputSetRangeBegin);
                        continue;
                    }

                    // Same thing at the end of the ranges...
                    // If the end of the range from the range list doesn't coincide with
                    //   the end of the range from the unicode set, split the range list
                    //   range in two.  The first part of the split range will be
                    //   wholly inside the Unicode set.
                    if (rlRange.fEndChar > inputSetRangeEnd)
                    {
                        rlRange.Split(inputSetRangeEnd + 1);
                    }

                    // The current rlRange is now entirely within the UnicodeSet range.
                    // Add this unicode set to the list of sets for this rlRange
                    if (rlRange.fIncludesSets.IndexOf(usetNode) == -1)
                    {
                        rlRange.fIncludesSets.Add(usetNode);
                    }

                    // Advance over ranges that we are finished with.
                    if (inputSetRangeEnd == rlRange.fEndChar)
                    {
                        inputSetRangeIndex++;
                    }
                    rlRange = rlRange.fNext;
                }
            }

            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("range", StringComparison.Ordinal) >= 0)
            {
                PrintRanges();
            }

            //
            //  Group the above ranges, with each group consisting of one or more
            //    ranges that are in exactly the same set of original UnicodeSets.
            //    The groups are numbered, and these group numbers are the set of
            //    input symbols recognized by the run-time state machine.
            //
            //    Numbering: # 0  (state table column 0) is unused.
            //               # 1  is reserved - table column 1 is for end-of-input
            //               # 2  is reserved - table column 2 is for beginning-in-input
            //               # 3  is the first range list.
            //
            RangeDescriptor rlSearchRange;

            for (rlRange = fRangeList; rlRange != null; rlRange = rlRange.fNext)
            {
                for (rlSearchRange = fRangeList; rlSearchRange != rlRange; rlSearchRange = rlSearchRange.fNext)
                {
                    if (ListEqualityComparer <RBBINode> .Default.Equals(rlRange.fIncludesSets, rlSearchRange.fIncludesSets))
                    {
                        rlRange.fNum = rlSearchRange.fNum;
                        break;
                    }
                }
                if (rlRange.fNum == 0)
                {
                    fGroupCount++;
                    rlRange.fNum = fGroupCount + 2;
                    rlRange.SetDictionaryFlag();
                    AddValToSets(rlRange.fIncludesSets, fGroupCount + 2);
                }
            }

            // Handle input sets that contain the special string {eof}.
            //   Column 1 of the state table is reserved for EOF on input.
            //   Column 2 is reserved for before-the-start-input.
            //            (This column can be optimized away later if there are no rule
            //             references to {bof}.)
            //   Add this column value (1 or 2) to the equivalent expression
            //     subtree for each UnicodeSet that contains the string {eof}
            //   Because {bof} and {eof} are not a characters in the normal sense,
            //   they doesn't affect the computation of ranges or TRIE.

            string eofString = "eof";
            string bofString = "bof";

            foreach (RBBINode usetNode in fRB.fUSetNodes)
            {
                UnicodeSet inputSet = usetNode.fInputSet;
                if (inputSet.Contains(eofString))
                {
                    AddValToSet(usetNode, 1);
                }
                if (inputSet.Contains(bofString))
                {
                    AddValToSet(usetNode, 2);
                    fSawBOF = true;
                }
            }


            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("rgroup", StringComparison.Ordinal) >= 0)
            {
                PrintRangeGroups();
            }
            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("esets", StringComparison.Ordinal) >= 0)
            {
                PrintSets();
            }

            fTrie = new Trie2Writable(0,       //   Initial value for all code points.
                                      0);      //   Error value for out-of-range input.

            for (rlRange = fRangeList; rlRange != null; rlRange = rlRange.fNext)
            {
                fTrie.SetRange(
                    rlRange.fStartChar,         // Range start
                    rlRange.fEndChar,           // Range end (inclusive)
                    rlRange.fNum,               // value for range
                    true                        // Overwrite previously written values
                    );
            }
        }
コード例 #3
0
ファイル: UnicodeSetIterator.cs プロジェクト: introfog/ICU4N
 internal virtual void LoadRange(int aRange) // ICU4N specific - marked internal instead of protected, since the functionality is obsolete
 {
     nextElement = set.GetRangeStart(aRange);
     endElement  = set.GetRangeEnd(aRange);
 }