Esempio n. 1
0
        private void doTestComposedChars(bool compat)
        {
            int options           = Normalizer.IGNORE_HANGUL;
            ComposedCharIter iter = new ComposedCharIter(compat, options);

            char lastChar = (char)0;

            while (iter.HasNext)
            {
                char ch = iter.Next();

                // Test all characters between the last one and this one to make
                // sure that they don't have decompositions
                assertNoDecomp(lastChar, ch, compat, options);
                lastChar = ch;

                // Now make sure that the decompositions for this character
                // make sense
                String chString   = new StringBuffer().Append(ch).ToString();
                String iterDecomp = iter.Decomposition();
                String normDecomp = Normalizer.Decompose(chString, compat);

                if (iterDecomp.Equals(chString))
                {
                    Errln("ERROR: " + Hex(ch) + " has identical decomp");
                }
                else if (!iterDecomp.Equals(normDecomp))
                {
                    Errln("ERROR: Normalizer decomp for " + Hex(ch) + " (" + Hex(normDecomp) + ")"
                          + " != iter decomp (" + Hex(iterDecomp) + ")");
                }
            }
            assertNoDecomp(lastChar, '\uFFFF', compat, options);
        }
Esempio n. 2
0
        public void TestRoundTrip()
        {
            int  options = Normalizer.IGNORE_HANGUL;
            bool compat  = false;

            ComposedCharIter iter = new ComposedCharIter(false, options);

            while (iter.HasNext)
            {
                char ch = iter.Next();

                string chStr  = "" + ch;
                string decomp = iter.Decomposition();
                string comp   = Normalizer.Compose(decomp, compat);

                if (UChar.HasBinaryProperty(ch, UProperty.Full_Composition_Exclusion))
                {
                    Logln("Skipped excluded char " + Hex(ch) + " (" + UChar.GetName(ch) + ")");
                    continue;
                }

                // Avoid disparaged characters
                if (decomp.Length == 4)
                {
                    continue;
                }

                if (!comp.Equals(chStr))
                {
                    Errln("ERROR: Round trip invalid: " + Hex(chStr) + " --> " + Hex(decomp)
                          + " --> " + Hex(comp));

                    Errln("  char decomp is '" + decomp + "'");
                }
            }
        }
Esempio n. 3
0
        /// <summary>
        /// Add expanding entries for pre-composed unicode characters so that this
        /// collator can be used reasonably well with decomposition turned off.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private void addComposedChars() throws ParseException
        private void AddComposedChars()
        {
            // Iterate through all of the pre-composed characters in Unicode
            ComposedCharIter iter = new ComposedCharIter();
            int c;

            while ((c = iter.next()) != ComposedCharIter.DONE)
            {
                if (GetCharOrder(c) == RBCollationTables.UNMAPPED)
                {
                    //
                    // We don't already have an ordering for this pre-composed character.
                    //
                    // First, see if the decomposed string is already in our
                    // tables as a single contracting-string ordering.
                    // If so, just map the precomposed character to that order.
                    //
                    // TODO: What we should really be doing here is trying to find the
                    // longest initial substring of the decomposition that is present
                    // in the tables as a contracting character sequence, and find its
                    // ordering.  Then do this recursively with the remaining chars
                    // so that we build a list of orderings, and add that list to
                    // the expansion table.
                    // That would be more correct but also significantly slower, so
                    // I'm not totally sure it's worth doing.
                    //
                    String s = iter.decomposition();

                    //sherman/Note: if this is 1 character decomposed string, the
                    //only thing need to do is to check if this decomposed character
                    //has an entry in our order table, this order is not necessary
                    //to be a contraction order, if it does have one, add an entry
                    //for the precomposed character by using the same order, the
                    //previous impl unnecessarily adds a single character expansion
                    //entry.
                    if (s.Length() == 1)
                    {
                        int order = GetCharOrder(s.CharAt(0));
                        if (order != RBCollationTables.UNMAPPED)
                        {
                            AddOrder(c, order);
                        }
                        continue;
                    }
                    else if (s.Length() == 2)
                    {
                        char ch0 = s.CharAt(0);
                        if (char.IsHighSurrogate(ch0))
                        {
                            int order = GetCharOrder(s.CodePointAt(0));
                            if (order != RBCollationTables.UNMAPPED)
                            {
                                AddOrder(c, order);
                            }
                            continue;
                        }
                    }
                    int contractOrder = GetContractOrder(s);
                    if (contractOrder != RBCollationTables.UNMAPPED)
                    {
                        AddOrder(c, contractOrder);
                    }
                    else
                    {
                        //
                        // We don't have a contracting ordering for the entire string
                        // that results from the decomposition, but if we have orders
                        // for each individual character, we can add an expanding
                        // table entry for the pre-composed character
                        //
                        bool allThere = true;
                        for (int i = 0; i < s.Length(); i++)
                        {
                            if (GetCharOrder(s.CharAt(i)) == RBCollationTables.UNMAPPED)
                            {
                                allThere = false;
                                break;
                            }
                        }
                        if (allThere)
                        {
                            AddExpandOrder(c, s, RBCollationTables.UNMAPPED);
                        }
                    }
                }
            }
        }