Exemplo n.º 1
0
        //
        // Check if we need to add a new item in the $m_categoryValue table.  If yes,
        // add one item and return the new item number.  Otherwise, return the existing
        // item number.
        //
        // Parameters:
        //   $allCategoryValues  The combination of Unicode category and BiDi category.
        //                       They should use the original form in UnicodeData.txt
        //                       (such as "Cn" for not assigned and "L" for Left-To-Right")
        //                       and are separated by a comma.
        //
        // Returns:
        //    The item number in the $m_categoryValue table.
        //
        private static byte GetCategoryValueItem(FlatDataTable categoriesValueTable, string allCategoryValues)
        {
            if (!CategoryValues.TryGetValue(allCategoryValues, out var categoryItem))
            {
                // This combination of Unicode category and BiDi category has not shown up before.
                if (CategoryValues.Count >= 255)
                {
                    throw new InvalidOperationException("The possible number of values exceeds 255.");
                }

                // Get the current element count of the hash table and update the category item
                categoryItem = (byte)CategoryValues.Count;
                CategoryValues.Add(allCategoryValues, categoryItem);
                // Add the category values.
                categoriesValueTable.AddData(categoryItem, allCategoryValues);
            }
            return(categoryItem);
        }
Exemplo n.º 2
0
        private static void Main(string[] args)
        {
            Verbose = false;
            // TODO: parse args

            var defaultCategoryValues = "Cn,L";

            // Create a 12:4:4 table for Unicode category
            // "Cn", Not assigned.  The value is 1 byte to indicate Unicode category
            // Make sure to put the default value into the slot 0 in $categoriesValueTable
            var categoriesIndexTable = new DataTable();
            // Create a 12:4:4 table for decimal digit value/digit value/numeric value
            var numericIndexTable = new DataTable();
            // Create a flat table for Unicode category and BiDi category
            var categoriesValueTable = new FlatDataTable(defaultCategoryValues, GetCategoriesValueBytes);
            // Create a flat table.
            // GetNumericValueBytes() is the callback used to generate the bytes of each item.
            var numericValueTable = new FlatDataTable("-1", GetNumericValueBytes);
            // Create a flat table for digit values
            // GetDigitValueBytes() is the callback used to generate the bytes of each item.
            var digitValueTable = new FlatDataTable("255,255", GetDigitValueBytes);

            // Add a default item into the category value table.  This will be the item 0 in the category value table.
            GetCategoryValueItem(categoriesValueTable, defaultCategoryValues);
            NumberValues.Add("-1,255,255", 0);
            numericValueTable.AddData(0, "-1");
            digitValueTable.AddData(0, "255,255");

            ReadSourceFile("UnicodeData.txt", categoriesIndexTable, categoriesValueTable, numericIndexTable, numericValueTable, digitValueTable);

            categoriesIndexTable.GenerateTable(nameof(categoriesIndexTable), 5, 4);
            //categoriesIndexTable.CalculateTableVariants();
            numericIndexTable.GenerateTable(nameof(numericIndexTable), 4, 4, cutOff: true);
            //numericIndexTable.CalculateTableVariants(cutOff: true);

            // generate the data C# source
            using (var file = File.CreateText(SOURCE_NAME))
            {
                file.Write("// Licensed to the .NET Foundation under one or more agreements.\n");
                file.Write("// The .NET Foundation licenses this file to you under the MIT license.\n");
                file.Write("// See the LICENSE file in the project root for more information.\n\n");

                file.Write("namespace System.Globalization\n");
                file.Write("{\n");
                file.Write("    public static partial class CharUnicodeInfo\n    {\n");

                file.Write("        // THE FOLLOWING DATA IS AUTO GENERATED BY GenUnicodeProp program UNDER THE TOOLS FOLDER\n");
                file.Write("        // PLEASE DON'T MODIFY BY HAND\n\n\n");

                file.Write("        // 11:5:4 index table of the Unicode category data.");
                PrintSourceIndexArray("CategoryLevel1Index", categoriesIndexTable, file);

                PrintValueArray("CategoriesValue", categoriesValueTable, file);

                file.Write("\n        // 12:4:4 index table of the Unicode numeric data.");
                PrintSourceIndexArray("NumericLevel1Index", numericIndexTable, file);

                file.Write("\n        // Every item contains the value for numeric value.");
                PrintValueArray("NumericValues", numericValueTable, file);

                PrintValueArray("DigitValues", digitValueTable, file);

                file.Write("\n    }\n}");
            }
        }
Exemplo n.º 3
0
 private static void PrintValueArray(string tableName, FlatDataTable d, StreamWriter file)
 {
     Console.WriteLine("    ******************************** .");
     PrintByteArray(tableName, file, d.GetBytesFlat());
 }
Exemplo n.º 4
0
        //   Read unicode.txt and call DataTable.AddData() to add values for codepoints.
        //
        //   Parameters:
        //       sourceFileName    Generally this refers to "unicodedata.txt".
        //       $pUnicodeDataTable  An instance of DataTable
        private static void ReadSourceFile(string sourceFileName, DataTable categoriesIndexTable, FlatDataTable categoriesValueTable, DataTable numericIndexTable, FlatDataTable numericValueTable, FlatDataTable digitValueTable)
        {
            var lineCount      = 0;        // The line count
            var codePointCount = 0;        // The count of the total characters in the file.

            Console.Write($"Read {sourceFileName}");

            // Field	Name in UnicodeData.txt
            // 0	Code value
            // 1	Character name
            // 2	General Category
            //
            // 3	Canonical Combining Classes
            // 4	Bidirectional Category
            // 5	Character Decomposition Mapping
            // 6	Decimal digit value
            // 7	Digit value
            // 8	Numeric value
            // 9	Mirrored
            // 10	Unicode 1.0 Name
            // 11	10646 comment field
            // 12	Uppercase Mapping
            // 13	Lowercase Mapping
            // 14	Titlecase Mapping

            using (var sourceFile = File.OpenText(sourceFileName))
                while (sourceFile.ReadLine() is string line)
                {
                    var fields   = line.Split(';');
                    var code     = uint.Parse(fields[0], NumberStyles.HexNumber);
                    var comments = fields[1];
                    var category = fields[2];

                    var bidiCategory      = fields[4];
                    var decimalDigitValue = fields[6];
                    var digitValue        = fields[7];
                    var numericValue      = fields[8];

                    var allCategoryValues = category + "," + bidiCategory;
                    var allDigitValue     = (decimalDigitValue == "" ? "255" : decimalDigitValue) + "," + (digitValue == "" ? "255" : digitValue);
                    var allNumValues      = numericValue == "" ? "-1" : numericValue;
                    var allValues         = allNumValues + "," + allDigitValue;

                    if (Verbose)
                    {
                        Console.WriteLine($"[{code:X4}]- Cat: [{category}], BiDi Category: [{bidiCategory}], Numeric: [{numericValue}], Comments: [{comments}]");
                    }

                    if (!NumberValues.TryGetValue(allValues, out var numItem))
                    {
                        if (NumberValues.Count >= 255)
                        {
                            throw new InvalidOperationException("The possible number of values exceeds 255.");
                        }
                        // Get the current element count of the hash table
                        numItem = (byte)NumberValues.Count;
                        NumberValues[allValues] = numItem;
                        numericValueTable.AddData(numItem, allNumValues);
                        digitValueTable.AddData(numItem, allDigitValue);
                    }

                    var categoryItem = GetCategoryValueItem(categoriesValueTable, allCategoryValues);

                    if (comments[0] == '<' && comments.EndsWith(", First>", StringComparison.Ordinal))
                    {
                        if (Verbose)
                        {
                            Console.WriteLine($"Range start: {code:X4} [{category}] [{comments}]");
                        }

                        // Read the next line to get the end of the range.
                        var endFields        = sourceFile.ReadLine().Split(';');
                        var codeEndRange     = uint.Parse(endFields[0], NumberStyles.HexNumber);
                        var valueEndRange    = endFields[2];
                        var commentsEndRange = endFields[1];

                        if (Verbose)
                        {
                            Console.WriteLine($"Range   end: {codeEndRange:X4} [{valueEndRange}] [{commentsEndRange}]");
                        }

                        if (category != valueEndRange)
                        {
                            Console.WriteLine("Different categories in the beginning of the range and the end of the range");
                            Environment.Exit(1);
                        }

                        // Add data for a range of code points
                        for (var i = code; i <= codeEndRange; i++)
                        {
                            categoriesIndexTable.AddData(i, categoryItem);
                            numericIndexTable.AddData(i, numItem);
                            codePointCount++;
                            if (Verbose)
                            {
                                Console.WriteLine($"Read: {i:X8} [{allCategoryValues}]");
                            }
                        }
                    }
                    else
                    {
                        // Add data for a single code point.
                        categoriesIndexTable.AddData(code, categoryItem);
                        numericIndexTable.AddData(code, numItem);
                        codePointCount++;
                        if (Verbose)
                        {
                            Console.WriteLine($"Read: {code:X8} [{allCategoryValues}]");
                        }
                    }
                    lineCount++;
                    if (lineCount % 256 == 0)
                    {
                        Console.Write('.');
                    }
                }

            Console.WriteLine();
            Console.WriteLine();
            Console.WriteLine($"    Total lines in the file: {lineCount}");
            Console.WriteLine($"    Total characters: {codePointCount}");

            var allValueCount = CategoryValues.Count;

            Console.WriteLine($"    Total possible categories values: {allValueCount + 1}.  Maximum allowed: 256");

            allValueCount = NumberValues.Count;
            Console.WriteLine($"    Total possible number values: {allValueCount + 1}.  Maximum allowed: 256");
            Console.WriteLine($"    Finish reading {sourceFileName}.");
        }