Ejemplo n.º 1
0
        public void GetUnicodeCategory_Int32()
        {
            for (int i = 0; i <= HIGHEST_CODE_POINT; i++)
            {
                CodePoint       knownGoodData  = UnicodeData.GetData(i);
                UnicodeCategory actualCategory = CharUnicodeInfo.GetUnicodeCategory(i);

                AssertEqual(knownGoodData.GeneralCategory, actualCategory, nameof(CharUnicodeInfo.GetUnicodeCategory), knownGoodData);
            }
        }
Ejemplo n.º 2
0
        public void GetUnicodeCategory_Char()
        {
            for (int i = 0; i <= char.MaxValue; i++)
            {
                char ch = (char)i;

                CodePoint       knownGoodData  = UnicodeData.GetData(ch);
                UnicodeCategory actualCategory = CharUnicodeInfo.GetUnicodeCategory(ch);

                AssertEqual(knownGoodData.GeneralCategory, actualCategory, nameof(CharUnicodeInfo.GetUnicodeCategory), knownGoodData);
            }
        }
Ejemplo n.º 3
0
        public void GetNumericValue_Char()
        {
            for (int i = 0; i <= char.MaxValue; i++)
            {
                char ch = (char)i;

                CodePoint knownGoodData = UnicodeData.GetData(ch);
                double    actualValue   = CharUnicodeInfo.GetNumericValue(ch);

                AssertEqual(knownGoodData.NumericValue, actualValue, nameof(CharUnicodeInfo.GetNumericValue), knownGoodData);
            }
        }
Ejemplo n.º 4
0
        private static void Main(string[] args)
        {
            Verbose           = args.Contains("-Verbose", StringComparer.OrdinalIgnoreCase);
            IncludeCasingData = args.Contains("-IncludeCasingData", StringComparer.OrdinalIgnoreCase);

            // First, read the data files and build up a list of all
            // assigned code points.

            Console.WriteLine("Reading Unicode data files...");

            _ = UnicodeData.GetData(0); // processes files

            Console.WriteLine("Finished.");
            Console.WriteLine();

            Console.WriteLine("Initializing maps...");
            Dictionary <CategoryCasingInfo, byte>  categoryCasingMap  = new Dictionary <CategoryCasingInfo, byte>();
            Dictionary <NumericGraphemeInfo, byte> numericGraphemeMap = new Dictionary <NumericGraphemeInfo, byte>();

            // Next, iterate though all assigned code points, populating
            // the category casing & numeric grapheme maps. Also put the
            // data into the the DataTable structure, which will compute
            // the tiered offset tables.

            DataTable categoryCasingTable  = new DataTable();
            DataTable numericGraphemeTable = new DataTable();

            for (int i = 0; i <= 0x10_FFFF; i++)
            {
                CodePoint thisCodePoint = UnicodeData.GetData(i);

                CategoryCasingInfo categoryCasingInfo = new CategoryCasingInfo(thisCodePoint);
                if (!categoryCasingMap.TryGetValue(categoryCasingInfo, out byte cciValue))
                {
                    cciValue = (byte)categoryCasingMap.Count;
                    categoryCasingMap[categoryCasingInfo] = cciValue;
                }
                categoryCasingTable.AddData((uint)i, cciValue);

                NumericGraphemeInfo numericGraphemeInfo = new NumericGraphemeInfo(thisCodePoint);
                if (!numericGraphemeMap.TryGetValue(numericGraphemeInfo, out byte ngiValue))
                {
                    ngiValue = (byte)numericGraphemeMap.Count;
                    numericGraphemeMap[numericGraphemeInfo] = ngiValue;
                }
                numericGraphemeTable.AddData((uint)i, ngiValue);
            }

            // Did anything overflow?

            Console.WriteLine($"CategoryCasingMap contains {categoryCasingMap.Count} entries.");
            if (categoryCasingMap.Count > 256)
            {
                throw new Exception("CategoryCasingMap exceeds max count of 256 entries!");
            }

            Console.WriteLine($"NumericGraphemeMap contains {numericGraphemeMap.Count} entries.");
            if (numericGraphemeMap.Count > 256)
            {
                throw new Exception("NumericGraphemeMap exceeds max count of 256 entries!");
            }

            Console.WriteLine();

            // Choose default ratios for the data tables we'll be generating.

            TableLevels categoryCasingTableLevelBits  = new TableLevels(5, 4);
            TableLevels numericGraphemeTableLevelBits = new TableLevels(5, 4);

            // Now generate the tables.

            categoryCasingTable.GenerateTable("CategoryCasingTable", categoryCasingTableLevelBits.Level2Bits, categoryCasingTableLevelBits.Level3Bits);
            numericGraphemeTable.GenerateTable("NumericGraphemeTable", numericGraphemeTableLevelBits.Level2Bits, numericGraphemeTableLevelBits.Level3Bits);

            // If you want to see if a different ratio would have better compression
            // statistics, uncomment the lines below and re-run the application.
            // categoryCasingTable.CalculateTableVariants();
            // numericGraphemeTable.CalculateTableVariants();

            // Now generate the C# source file.

            using (StreamWriter file = File.CreateText(SOURCE_NAME))
            {
                file.Write("// Licensed to the .NET Foundation under one or more agreements.\n");
                file.Write("// The .NET Foundation licenses this file to you under the MIT license.\n");

                file.Write("using System.Diagnostics;\n\n");

                file.Write("namespace System.Globalization\n");
                file.Write("{\n");
                file.Write("    public static partial class CharUnicodeInfo\n    {\n");

                file.Write("        // THE FOLLOWING DATA IS AUTO GENERATED BY GenUnicodeProp program UNDER THE TOOLS FOLDER\n");
                file.Write("        // PLEASE DON'T MODIFY BY HAND\n");
                file.Write("        // IF YOU NEED TO UPDATE UNICODE VERSION FOLLOW THE GUIDE AT src/libraries/System.Private.CoreLib/Tools/GenUnicodeProp/Updating-Unicode-Versions.md\n");

                PrintAssertTableLevelsBitCountRoutine("CategoryCasing", file, categoryCasingTableLevelBits);

                file.Write($"\n        // {categoryCasingTableLevelBits} index table of the Unicode category & casing data.");
                PrintSourceIndexArray("CategoryCasingLevel1Index", categoryCasingTable, file);

                file.Write("\n        // Contains Unicode category & bidi class information");
                PrintValueArray("CategoriesValues", categoryCasingMap, CategoryCasingInfo.ToCategoryBytes, file);

                if (IncludeCasingData)
                {
                    // Only write out the casing data if we have been asked to do so.

                    file.Write("\n        // Contains simple culture-invariant uppercase mappings");
                    PrintValueArray("UppercaseValues", categoryCasingMap, CategoryCasingInfo.ToUpperBytes, file);

                    file.Write("\n        // Contains simple culture-invariant lowercase mappings");
                    PrintValueArray("LowercaseValues", categoryCasingMap, CategoryCasingInfo.ToLowerBytes, file);

                    file.Write("\n        // Contains simple culture-invariant titlecase mappings");
                    PrintValueArray("TitlecaseValues", categoryCasingMap, CategoryCasingInfo.ToTitleBytes, file);

                    file.Write("\n        // Contains simple culture-invariant case fold mappings");
                    PrintValueArray("CaseFoldValues", categoryCasingMap, CategoryCasingInfo.ToCaseFoldBytes, file);
                }

                PrintAssertTableLevelsBitCountRoutine("NumericGrapheme", file, numericGraphemeTableLevelBits);

                file.Write($"\n        // {numericGraphemeTableLevelBits} index table of the Unicode numeric & text segmentation data.");
                PrintSourceIndexArray("NumericGraphemeLevel1Index", numericGraphemeTable, file);

                file.Write("\n        // Contains decimal digit values in high nibble; digit values in low nibble");
                PrintValueArray("DigitValues", numericGraphemeMap, NumericGraphemeInfo.ToDigitBytes, file);

                file.Write("\n        // Contains numeric values");
                PrintValueArray("NumericValues", numericGraphemeMap, NumericGraphemeInfo.ToNumericBytes, file);

                file.Write("\n        // Contains grapheme cluster segmentation values");
                PrintValueArray("GraphemeSegmentationValues", numericGraphemeMap, NumericGraphemeInfo.ToGraphemeBytes, file);

                file.Write("\n    }\n}\n");
            }

            // Quick fixup: Replace \n with \r\n on Windows.

            if (Environment.NewLine != "\n")
            {
                File.WriteAllText(SOURCE_NAME, File.ReadAllText(SOURCE_NAME).Replace("\n", Environment.NewLine));
            }

            Console.WriteLine("Completed!");
        }