コード例 #1
0
        private void _testTrieRanges(TrieTest.SetRange[] setRanges, int countSetRanges,
                                     TrieTest.CheckRange[] checkRanges, int countCheckRanges, bool latin1Linear)
        {
            IntTrieBuilder newTrie = new IntTrieBuilder(null, 2000,
                                                        checkRanges[0].value_ren, checkRanges[0].value_ren, latin1Linear);

            // set values from setRanges[]
            bool ok = true;

            for (int i = 0; i < countSetRanges; ++i)
            {
                int  start_0     = setRanges[i].start;
                int  limit_1     = setRanges[i].limit;
                int  value_ren   = setRanges[i].value_ren;
                bool overwrite_2 = setRanges[i].overwrite;
                if ((limit_1 - start_0) == 1 && overwrite_2)
                {
                    ok &= newTrie.SetValue(start_0, value_ren);
                }
                else
                {
                    ok &= newTrie.SetRange(start_0, limit_1, value_ren, overwrite_2);
                }
            }
            if (!ok)
            {
                Errln("setting values into a trie failed");
                return;
            }

            // verify that all these values are in the new Trie
            int start_3 = 0;

            for (int i_4 = 0; i_4 < countCheckRanges; ++i_4)
            {
                int limit_5 = checkRanges[i_4].limit;
                int value_6 = checkRanges[i_4].value_ren;

                while (start_3 < limit_5)
                {
                    if (value_6 != newTrie.GetValue(start_3))
                    {
                        Errln("newTrie [U+" + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + "]==0x"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(newTrie.GetValue(start_3), 16)
                              + " instead of 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value_6, 16));
                    }
                    ++start_3;
                }
            }

            IntTrie trie = newTrie.Serialize(new TrieTest._testFoldedValue(newTrie),
                                             new TrieTest._testFoldingOffset());

            // test linear Latin-1 range from utrie_getData()
            if (latin1Linear)
            {
                start_3 = 0;
                for (int i_7 = 0; i_7 < countCheckRanges && start_3 <= 0xff; ++i_7)
                {
                    int limit_8 = checkRanges[i_7].limit;
                    int value_9 = checkRanges[i_7].value_ren;

                    while (start_3 < limit_8 && start_3 <= 0xff)
                    {
                        if (value_9 != trie.GetLatin1LinearValue((char)start_3))
                        {
                            Errln("IntTrie.getLatin1LinearValue[U+"
                                  + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16)
                                  + "]==0x"
                                  + ILOG.J2CsMapping.Util.IlNumber.ToString(trie
                                                                            .GetLatin1LinearValue((char)start_3), 16)
                                  + " instead of 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value_9, 16));
                        }
                        ++start_3;
                    }
                }
            }

            if (latin1Linear != trie.IsLatin1Linear())
            {
                Errln("trie serialization did not preserve " + "Latin-1-linearity");
            }

            // verify that all these values are in the serialized Trie
            start_3 = 0;
            for (int i_10 = 0; i_10 < countCheckRanges; ++i_10)
            {
                int limit_11 = checkRanges[i_10].limit;
                int value_12 = checkRanges[i_10].value_ren;

                if (start_3 == 0xd800)
                {
                    // skip surrogates
                    start_3 = limit_11;
                    continue;
                }

                while (start_3 < limit_11)
                {
                    if (start_3 <= 0xffff)
                    {
                        int value2 = trie.GetBMPValue((char)start_3);
                        if (value_12 != value2)
                        {
                            Errln("serialized trie.getBMPValue(U+"
                                  + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + " == 0x"
                                  + ILOG.J2CsMapping.Util.IlNumber.ToString(value2, 16)
                                  + " instead of 0x" + ILOG.J2CsMapping.Util.IlNumber.ToString(value_12, 16));
                        }
                        if (!IBM.ICU.Text.UTF16.IsLeadSurrogate((char)start_3))
                        {
                            value2 = trie.GetLeadValue((char)start_3);
                            if (value_12 != value2)
                            {
                                Errln("serialized trie.getLeadValue(U+"
                                      + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + " == 0x"
                                      + ILOG.J2CsMapping.Util.IlNumber.ToString(value2, 16)
                                      + " instead of 0x"
                                      + ILOG.J2CsMapping.Util.IlNumber.ToString(value_12, 16));
                            }
                        }
                    }
                    int value2_13 = trie.GetCodePointValue(start_3);
                    if (value_12 != value2_13)
                    {
                        Errln("serialized trie.getCodePointValue(U+"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16) + ")==0x"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(value2_13, 16) + " instead of 0x"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(value_12, 16));
                    }
                    ++start_3;
                }
            }

            // enumerate and verify all ranges

            int          enumRanges = 1;
            TrieIterator iter       = new TrieTest._testEnumValue(trie);

            RangeValueIterator_Constants.Element result = new RangeValueIterator_Constants.Element();
            while (iter.Next(result))
            {
                if (result.start != checkRanges[enumRanges - 1].limit ||
                    result.limit != checkRanges[enumRanges].limit ||
                    (result.value_ren ^ 0x5555) != checkRanges[enumRanges].value_ren)
                {
                    Errln("utrie_enum() delivers wrong range [U+"
                          + ILOG.J2CsMapping.Util.IlNumber.ToString(result.start, 16)
                          + "..U+"
                          + ILOG.J2CsMapping.Util.IlNumber.ToString(result.limit, 16)
                          + "].0x"
                          + ILOG.J2CsMapping.Util.IlNumber.ToString(result.value_ren ^ 0x5555, 16)
                          + " instead of [U+"
                          + ILOG.J2CsMapping.Util.IlNumber.ToString(checkRanges[enumRanges - 1].limit, 16)
                          + "..U+"
                          + ILOG.J2CsMapping.Util.IlNumber.ToString(checkRanges[enumRanges].limit, 16)
                          + "].0x"
                          + ILOG.J2CsMapping.Util.IlNumber.ToString(checkRanges[enumRanges].value_ren, 16));
                }
                enumRanges++;
            }

            // test linear Latin-1 range
            if (trie.IsLatin1Linear())
            {
                for (start_3 = 0; start_3 < 0x100; ++start_3)
                {
                    if (trie.GetLatin1LinearValue((char)start_3) != trie
                        .GetLeadValue((char)start_3))
                    {
                        Errln("trie.getLatin1LinearValue[U+"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(start_3, 16)
                              + "]=0x"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(trie
                                                                        .GetLatin1LinearValue((char)start_3), 16)
                              + " instead of 0x"
                              + ILOG.J2CsMapping.Util.IlNumber.ToString(trie
                                                                        .GetLeadValue((char)start_3), 16));
                    }
                }
            }

            _testTrieIteration(trie, checkRanges, countCheckRanges);
        }
コード例 #2
0
        // ------------------------------------------------------------------------
        //
        // build Build the list of non-overlapping character ranges
        // from the Unicode Sets.
        //
        // ------------------------------------------------------------------------
        internal void Build()
        {
            RBBINode usetNode;

            RBBISetBuilder.RangeDescriptor rlRange;

            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("usets") >= 0)
            {
                PrintSets();
            }

            // Initialize the process by creating a single range encompassing all
            // characters
            // that is in no sets.
            //
            fRangeList            = new RBBISetBuilder.RangeDescriptor();
            fRangeList.fStartChar = 0;
            fRangeList.fEndChar   = 0x10ffff;

            //
            // Find the set of non-overlapping ranges of characters
            //
            IIterator ni = new ILOG.J2CsMapping.Collections.IteratorAdapter(fRB.fUSetNodes.GetEnumerator());

            while (ni.HasNext())
            {
                usetNode = (RBBINode)ni.Next();

                UnicodeSet inputSet           = usetNode.fInputSet;
                int        inputSetRangeCount = inputSet.GetRangeCount();
                int        inputSetRangeIndex = 0;
                rlRange = fRangeList;

                for (;;)
                {
                    if (inputSetRangeIndex >= inputSetRangeCount)
                    {
                        break;
                    }
                    int inputSetRangeBegin = inputSet
                                             .GetRangeStart(inputSetRangeIndex);
                    int inputSetRangeEnd = inputSet.GetRangeEnd(inputSetRangeIndex);

                    // skip over ranges from the range list that are completely
                    // below the current range from the input unicode set.
                    while (rlRange.fEndChar < inputSetRangeBegin)
                    {
                        rlRange = rlRange.fNext;
                    }

                    // If the start of the range from the range list is before with
                    // the start of the range from the unicode set, split the range
                    // list range
                    // in two, with one part being before (wholly outside of) the
                    // unicode set
                    // and the other containing the rest.
                    // Then continue the loop; the post-split current range will
                    // then be skipped
                    // over
                    if (rlRange.fStartChar < inputSetRangeBegin)
                    {
                        rlRange.Split(inputSetRangeBegin);
                        continue;
                    }

                    // Same thing at the end of the ranges...
                    // If the end of the range from the range list doesn't coincide
                    // with
                    // the end of the range from the unicode set, split the range
                    // list
                    // range in two. The first part of the split range will be
                    // wholly inside the Unicode set.
                    if (rlRange.fEndChar > inputSetRangeEnd)
                    {
                        rlRange.Split(inputSetRangeEnd + 1);
                    }

                    // The current rlRange is now entirely within the UnicodeSet
                    // range.
                    // Add this unicode set to the list of sets for this rlRange
                    if (rlRange.fIncludesSets.IndexOf(usetNode) == -1)
                    {
                        ILOG.J2CsMapping.Collections.Generics.Collections.Add(rlRange.fIncludesSets, usetNode);
                    }

                    // Advance over ranges that we are finished with.
                    if (inputSetRangeEnd == rlRange.fEndChar)
                    {
                        inputSetRangeIndex++;
                    }
                    rlRange = rlRange.fNext;
                }
            }

            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("range") >= 0)
            {
                PrintRanges();
            }

            //
            // Group the above ranges, with each group consisting of one or more
            // ranges that are in exactly the same set of original UnicodeSets.
            // The groups are numbered, and these group numbers are the set of
            // input symbols recognized by the run-time state machine.
            //
            // Numbering: # 0 (state table column 0) is unused.
            // # 1 is reserved - table column 1 is for end-of-input
            // # 2 is reserved - table column 2 is for beginning-in-input
            // # 3 is the first range list.
            //
            RBBISetBuilder.RangeDescriptor rlSearchRange;
            for (rlRange = fRangeList; rlRange != null; rlRange = rlRange.fNext)
            {
                for (rlSearchRange = fRangeList; rlSearchRange != rlRange; rlSearchRange = rlSearchRange.fNext)
                {
                    if (rlRange.fIncludesSets.Equals(rlSearchRange.fIncludesSets))
                    {
                        rlRange.fNum = rlSearchRange.fNum;
                        break;
                    }
                }
                if (rlRange.fNum == 0)
                {
                    fGroupCount++;
                    rlRange.fNum = fGroupCount + 2;
                    rlRange.SetDictionaryFlag();
                    AddValToSets(rlRange.fIncludesSets, fGroupCount + 2);
                }
            }

            // Handle input sets that contain the special string {eof}.
            // Column 1 of the state table is reserved for EOF on input.
            // Column 2 is reserved for before-the-start-input.
            // (This column can be optimized away later if there are no rule
            // references to {bof}.)
            // Add this column value (1 or 2) to the equivalent expression
            // subtree for each UnicodeSet that contains the string {eof}
            // Because {bof} and {eof} are not a characters in the normal sense,
            // they doesn't affect the computation of ranges or TRIE.

            String eofString = "eof";
            String bofString = "bof";

            ni = new ILOG.J2CsMapping.Collections.IteratorAdapter(fRB.fUSetNodes.GetEnumerator());
            while (ni.HasNext())
            {
                usetNode = (RBBINode)ni.Next();
                UnicodeSet inputSet_0 = usetNode.fInputSet;
                if (inputSet_0.Contains(eofString))
                {
                    AddValToSet(usetNode, 1);
                }
                if (inputSet_0.Contains(bofString))
                {
                    AddValToSet(usetNode, 2);
                    fSawBOF = true;
                }
            }

            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("rgroup") >= 0)
            {
                PrintRangeGroups();
            }
            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("esets") >= 0)
            {
                PrintSets();
            }

            // IntTrieBuilder(int aliasdata[], int maxdatalength,
            // int initialvalue, int leadunitvalue,
            // boolean latin1linear)

            fTrie = new IntTrieBuilder(null,   // Data array (utrie will allocate one)
                                       100000, // Max Data Length
                                       0,      // Initial value for all code points
                                       0,      // Lead Surrogate unit value,
                                       true);  // Keep Latin 1 in separately.

            for (rlRange = fRangeList; rlRange != null; rlRange = rlRange.fNext)
            {
                fTrie.SetRange(rlRange.fStartChar, rlRange.fEndChar + 1,
                               rlRange.fNum, true);
            }
        }