예제 #1
0
 // two strings that are canonically equivalent must test
 // equal under a canonical caseless match
 // see UAX #21 Case Mappings and Jitterbug 2021 and
 // Unicode Technical Committee meeting consensus 92-C31
 private void compare(String s1, String s2)
 {
     if (s1.Length == 1 && s2.Length == 1)
     {
         if (Normalizer.Compare(UTF16.CharAt(s1, 0), UTF16.CharAt(s2, 0), Normalizer.COMPARE_IGNORE_CASE) != 0)
         {
             Errln("Normalizer.compare(int,int) failed for s1: "
                   + Utility.Hex(s1) + " s2: " + Utility.Hex(s2));
         }
     }
     if (s1.Length == 1 && s2.Length > 1)
     {
         if (Normalizer.Compare(UTF16.CharAt(s1, 0), s2, Normalizer.COMPARE_IGNORE_CASE) != 0)
         {
             Errln("Normalizer.compare(int,String) failed for s1: "
                   + Utility.Hex(s1) + " s2: " + Utility.Hex(s2));
         }
     }
     if (s1.Length > 1 && s2.Length > 1)
     {
         // TODO: Re-enable this tests after UTC fixes UAX 21
         if (Normalizer.Compare(s1.ToCharArray(), s2.ToCharArray(), Normalizer.COMPARE_IGNORE_CASE) != 0)
         {
             Errln("Normalizer.compare(char[],char[]) failed for s1: "
                   + Utility.Hex(s1) + " s2: " + Utility.Hex(s2));
         }
     }
 }
예제 #2
0
        // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
        private string[] GetEquivalents(string segment)
        {
            ISet <string> result       = new HashSet <string>();
            ISet <string> basic        = GetEquivalents2(segment);
            ISet <string> permutations = new HashSet <string>();

            // now get all the permutations
            // add only the ones that are canonically equivalent
            // TODO: optimize by not permuting any class zero.
            using (IEnumerator <string> it = basic.GetEnumerator())
            {
                while (it.MoveNext())
                {
                    string item = it.Current;
                    permutations.Clear();
#pragma warning disable 612, 618
                    Permute(item, SKIP_ZEROS, permutations);
#pragma warning restore 612, 618
                    using (IEnumerator <string> it2 = permutations.GetEnumerator())
                    {
                        while (it2.MoveNext())
                        {
                            string possible = it2.Current;

                            /*
                             *              String attempt = Normalizer.normalize(possible, Normalizer.DECOMP, 0);
                             *              if (attempt.equals(segment)) {
                             */
                            if (Normalizer.Compare(possible, segment, 0) == 0)
                            {
                                if (PROGRESS)
                                {
                                    Console.Out.WriteLine("Adding Permutation: " + Utility.Hex(possible));
                                }
                                result.Add(possible);
                            }
                            else
                            {
                                if (PROGRESS)
                                {
                                    Console.Out.WriteLine("-Skipping Permutation: " + Utility.Hex(possible));
                                }
                            }
                        }
                    }
                }
            }

            // convert into a String[] to clean up storage
            string[] finalResult = new string[result.Count];
            result.CopyTo(finalResult, 0);
            return(finalResult);
        }
예제 #3
0
        /// <summary>
        /// See if the decomposition of cp2 is at segment starting at <paramref name="segmentPos"/>
        /// (with canonical rearrangment!).
        /// If so, take the remainder, and return the equivalents.
        /// </summary>
        /// <param name="comp"></param>
        /// <param name="segment"></param>
        /// <param name="segmentPos"></param>
        /// <param name="buf"></param>
        /// <returns></returns>
        private ISet <string> Extract(int comp, string segment, int segmentPos, StringBuffer buf)
        {
            if (PROGRESS)
            {
                Console.Out.WriteLine(" extract: " + Utility.Hex(UTF16.ValueOf(comp))
                                      + ", " + Utility.Hex(segment.Substring(segmentPos)));
            }

            string decomp = nfcImpl.GetDecomposition(comp);

            if (decomp == null)
            {
                decomp = UTF16.ValueOf(comp);
            }

            // See if it matches the start of segment (at segmentPos)
            bool ok = false;
            int  cp;
            int  decompPos = 0;
            int  decompCp  = UTF16.CharAt(decomp, 0);

            decompPos += UTF16.GetCharCount(decompCp); // adjust position to skip first char
                                                       //int decompClass = getClass(decompCp);
            buf.Length = 0;                            // initialize working buffer, shared among callees

            for (int i = segmentPos; i < segment.Length; i += UTF16.GetCharCount(cp))
            {
                cp = UTF16.CharAt(segment, i);
                if (cp == decompCp)
                { // if equal, eat another cp from decomp
                    if (PROGRESS)
                    {
                        Console.Out.WriteLine("  matches: " + Utility.Hex(UTF16.ValueOf(cp)));
                    }
                    if (decompPos == decomp.Length)
                    {                                                              // done, have all decomp characters!
                        buf.Append(segment.Substring(i + UTF16.GetCharCount(cp))); // add remaining segment chars
                        ok = true;
                        break;
                    }
                    decompCp   = UTF16.CharAt(decomp, decompPos);
                    decompPos += UTF16.GetCharCount(decompCp);
                    //decompClass = getClass(decompCp);
                }
                else
                {
                    if (PROGRESS)
                    {
                        Console.Out.WriteLine("  buffer: " + Utility.Hex(UTF16.ValueOf(cp)));
                    }
                    // brute force approach
                    UTF16.Append(buf, cp);

                    /* TODO: optimize
                     * // since we know that the classes are monotonically increasing, after zero
                     * // e.g. 0 5 7 9 0 3
                     * // we can do an optimization
                     * // there are only a few cases that work: zero, less, same, greater
                     * // if both classes are the same, we fail
                     * // if the decomp class < the segment class, we fail
                     *
                     * segClass = getClass(cp);
                     * if (decompClass <= segClass) return null;
                     */
                }
            }
            if (!ok)
            {
                return(null);     // we failed, characters left over
            }
            if (PROGRESS)
            {
                Console.Out.WriteLine("Matches");
            }
            if (buf.Length == 0)
            {
                return(SET_WITH_NULL_STRING);                 // succeed, but no remainder
            }
            string remainder = buf.ToString();

            // brute force approach
            // to check to make sure result is canonically equivalent

            /*
             * String trial = Normalizer.normalize(UTF16.valueOf(comp) + remainder, Normalizer.DECOMP, 0);
             * if (!segment.regionMatches(segmentPos, trial, 0, segment.length() - segmentPos)) return null;
             */

            if (0 != Normalizer.Compare(UTF16.ValueOf(comp) + remainder, segment.Substring(segmentPos), 0))
            {
                return(null);
            }

            // get the remaining combinations
            return(GetEquivalents2(remainder));
        }
예제 #4
0
        /**
         * Verify the conformance of the given line of the Unicode
         * normalization (UTR 15) test suite file.  For each line,
         * there are five columns, corresponding to field[0]..field[4].
         *
         * The following invariants must be true for all conformant implementations
         *  c2 == NFC(c1) == NFC(c2) == NFC(c3)
         *  c3 == NFD(c1) == NFD(c2) == NFD(c3)
         *  c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
         *  c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
         *
         * @param field the 5 columns
         * @param line the source line from the test suite file
         * @return true if the test passes
         */
        private bool checkConformance(String[] field, String line, int options)
        {
            bool         pass = true;
            StringBuffer buf = new StringBuffer(); // scratch
            String       @out, fcd;
            int          i = 0;

            for (i = 0; i < 5; ++i)
            {
                int fieldNum = i + 1;
                if (i < 3)
                {
                    pass &= checkNorm(Normalizer.NFC, options, field[i], field[1], fieldNum);
                    pass &= checkNorm(Normalizer.NFD, options, field[i], field[2], fieldNum);
                }
                pass &= checkNorm(Normalizer.NFKC, options, field[i], field[3], fieldNum);
                pass &= checkNorm(Normalizer.NFKD, options, field[i], field[4], fieldNum);
                cross(field[4] /*NFKD String*/, field[3] /*NFKC String*/, Normalizer.NFKC);
                cross(field[3] /*NFKC String*/, field[4] /*NFKD String*/, Normalizer.NFKD);
            }
            compare(field[1], field[2]);
            compare(field[0], field[1]);
            compare(field[0], field[2]);
            // test quick checks
            if (NormalizerQuickCheckResult.No == Normalizer.QuickCheck(field[1], Normalizer.NFC, options))
            {
                Errln("Normalizer error: quickCheck(NFC(s), Normalizer.NFC) is Normalizer.NO");
                pass = false;
            }
            if (Normalizer.NO == Normalizer.QuickCheck(field[2], Normalizer.NFD, options))
            {
                Errln("Normalizer error: quickCheck(NFD(s), Normalizer.NFD) is Normalizer.NO");
                pass = false;
            }
            if (Normalizer.NO == Normalizer.QuickCheck(field[3], Normalizer.NFKC, options))
            {
                Errln("Normalizer error: quickCheck(NFKC(s), Normalizer.NFKC) is Normalizer.NO");
                pass = false;
            }
            if (Normalizer.NO == Normalizer.QuickCheck(field[4], Normalizer.NFKD, options))
            {
                Errln("Normalizer error: quickCheck(NFKD(s), Normalizer.NFKD) is Normalizer.NO");
                pass = false;
            }

            if (!Normalizer.IsNormalized(field[1], Normalizer.NFC, options))
            {
                Errln("Normalizer error: isNormalized(NFC(s), Normalizer.NFC) is false");
                pass = false;
            }
            if (!field[0].Equals(field[1]) && Normalizer.IsNormalized(field[0], Normalizer.NFC, options))
            {
                Errln("Normalizer error: isNormalized(s, Normalizer.NFC) is TRUE");
                pass = false;
            }
            if (!Normalizer.IsNormalized(field[3], Normalizer.NFKC, options))
            {
                Errln("Normalizer error: isNormalized(NFKC(s), Normalizer.NFKC) is false");
                pass = false;
            }
            if (!field[0].Equals(field[3]) && Normalizer.IsNormalized(field[0], Normalizer.NFKC, options))
            {
                Errln("Normalizer error: isNormalized(s, Normalizer.NFKC) is TRUE");
                pass = false;
            }
            // test api that takes a char[]
            if (!Normalizer.IsNormalized(field[1].ToCharArray(), 0, field[1].Length, Normalizer.NFC, options))
            {
                Errln("Normalizer error: isNormalized(NFC(s), Normalizer.NFC) is false");
                pass = false;
            }
            // test api that takes a codepoint
            if (!Normalizer.IsNormalized(UTF16.CharAt(field[1], 0), Normalizer.NFC, options))
            {
                Errln("Normalizer error: isNormalized(NFC(s), Normalizer.NFC) is false");
                pass = false;
            }
            // test FCD quick check and "makeFCD"
            fcd = Normalizer.Normalize(field[0], Normalizer.FCD);
            if (Normalizer.NO == Normalizer.QuickCheck(fcd, Normalizer.FCD, options))
            {
                Errln("Normalizer error: quickCheck(FCD(s), Normalizer.FCD) is Normalizer.NO");
                pass = false;
            }
            // check FCD return length
            {
                char[] fcd2   = new char[fcd.Length * 2];
                char[] src    = field[0].ToCharArray();
                int    fcdLen = Normalizer.Normalize(src, 0, src.Length, fcd2, fcd.Length, fcd2.Length, Normalizer.FCD, 0);
                if (fcdLen != fcd.Length)
                {
                    Errln("makeFCD did not return the correct length");
                }
            }
            if (Normalizer.NO == Normalizer.QuickCheck(fcd, Normalizer.FCD, options))
            {
                Errln("Normalizer error: quickCheck(FCD(s), Normalizer.FCD) is Normalizer.NO");
                pass = false;
            }
            if (Normalizer.NO == Normalizer.QuickCheck(field[2], Normalizer.FCD, options))
            {
                Errln("Normalizer error: quickCheck(NFD(s), Normalizer.FCD) is Normalizer.NO");
                pass = false;
            }

            if (Normalizer.NO == Normalizer.QuickCheck(field[4], Normalizer.FCD, options))
            {
                Errln("Normalizer error: quickCheck(NFKD(s), Normalizer.FCD) is Normalizer.NO");
                pass = false;
            }

            @out = iterativeNorm(new StringCharacterIterator(field[0]), Normalizer.FCD, buf, +1, options);
            @out = iterativeNorm(new StringCharacterIterator(field[0]), Normalizer.FCD, buf, -1, options);

            @out = iterativeNorm(new StringCharacterIterator(field[2]), Normalizer.FCD, buf, +1, options);
            @out = iterativeNorm(new StringCharacterIterator(field[2]), Normalizer.FCD, buf, -1, options);

            @out = iterativeNorm(new StringCharacterIterator(field[4]), Normalizer.FCD, buf, +1, options);
            @out = iterativeNorm(new StringCharacterIterator(field[4]), Normalizer.FCD, buf, -1, options);

            @out = Normalizer.Normalize(fcd, Normalizer.NFD);
            if ([email protected](field[2]))
            {
                Errln("Normalizer error: NFD(FCD(s))!=NFD(s)");
                pass = false;
            }
            if (!pass)
            {
                Errln("FAIL: " + line);
            }
            if (field[0] != field[2])
            {
                // two strings that are canonically equivalent must test
                // equal under a canonical caseless match
                // see UAX #21 Case Mappings and Jitterbug 2021 and
                // Unicode Technical Committee meeting consensus 92-C31
                int rc;
                if ((rc = Normalizer.Compare(field[0], field[2], (options << Normalizer.COMPARE_NORM_OPTIONS_SHIFT) | Normalizer.COMPARE_IGNORE_CASE)) != 0)
                {
                    Errln("Normalizer.compare(original, NFD, case-insensitive) returned " + rc + " instead of 0 for equal");
                    pass = false;
                }
            }

            return(pass);
        }