private async static Task <UnicodeData[]> ReadXml(string source)
        {
            Task <UnicodeData[]> t = Task.Run(() =>
            {
                IEnumerable <XElement> IEXelem = XElement.Parse(source).Descendants();
                int j = 0;
                UnicodeData[] list = new UnicodeData[IEXelem.Count()];
                foreach (XElement level1Element in IEXelem)
                {
                    //< div class="u"><span>䷀</span><tt>4dc0</tt></div>
                    if (level1Element.Attribute("class") != null && level1Element.Attribute("class").Value.ToLower().StartsWith("u"))
                    {
                        list[j] = new UnicodeData()
                        {
                            Name   = level1Element.Element("span").Value,
                            IsLeaf = true,
                            Code   = level1Element.Element("tt").Value,
                            Title  = level1Element.Element("span").Value,
                        };
                        list[j].DataCode = Convert.ToInt32("0x" + list[j].Code, 16);
                    }
                    j++;
                }

                return(list);
            });

            UnicodeData[] res = await t;
            return(res);
        }
Example #2
0
        static async Task _InitAsync()
        {
            UnicodeData = await Task.Run(async() =>
            {
                var file = await StorageFile.GetFileFromApplicationUriAsync(new Uri("ms-appx:///Data/ucd.all.optimized.xml"));
                var ras  = await file.OpenReadAsync();
                using (var stream = ras.AsStreamForRead())
                {
                    return(UnicodeData.Load(stream));
                }
            });

            Fonts = (await FontData.LoadAsync()).OrderBy(f => f.Name).ToList();

#if DEBUG
            //var outputfolder = ApplicationData.Current.LocalFolder;
            //var outputfile = await outputfolder.CreateFileAsync("ucd.all.optimized.xml", CreationCollisionOption.ReplaceExisting);
            //var rasoutput = await outputfile.OpenAsync(FileAccessMode.ReadWrite);
            //await Task.Run(() =>
            //{
            //    using (var stream = rasoutput.AsStreamForWrite())
            //    {
            //        UnicodeData.Save(stream, UnicodeData);
            //    }
            //});
#endif
        }
Example #3
0
 private static void ReadUnicodeData(UnicodeData ud)
 {
     foreach (var e in ud.GetEntries().Skip(30).Take(50))
     {
         Console.WriteLine(e);
     }
 }
        private void btnUnicode_Click(object sender, EventArgs e)
        {
            Button      butt = sender as Button;
            UnicodeData d    = butt.Tag as UnicodeData;
            int         c    = Convert.ToInt32("0x" + d.Code, 16);
            String      s    = ((char)c).ToString();

            textBox1.AppendText(s);
        }
Example #5
0
        public static void IsWhiteSpace_AllInputs()
        {
            // This tests calls Rune.IsWhiteSpace for every possible input, ensuring that
            // the runtime agrees with the data in the core Unicode files.

            foreach (Rune rune in AllRunes())
            {
                Assert.Equal(UnicodeData.IsWhiteSpace(rune.Value), Rune.IsWhiteSpace(rune));
            }
        }
Example #6
0
        public void GetUnicodeCategory_Int32()
        {
            for (int i = 0; i <= HIGHEST_CODE_POINT; i++)
            {
                CodePoint       knownGoodData  = UnicodeData.GetData(i);
                UnicodeCategory actualCategory = CharUnicodeInfo.GetUnicodeCategory(i);

                AssertEqual(knownGoodData.GeneralCategory, actualCategory, nameof(CharUnicodeInfo.GetUnicodeCategory), knownGoodData);
            }
        }
Example #7
0
        private static void DecompositionMap(UnicodeData ud)
        {
            foreach (var e in ud.GetEntries())
            {
                if (e.DecompositionMapping.Length == 0)
                {
                    continue;
                }

                Console.WriteLine(e.DecompositionMapping);
            }
        }
Example #8
0
        public void GetNumericValue_Char()
        {
            for (int i = 0; i <= char.MaxValue; i++)
            {
                char ch = (char)i;

                CodePoint knownGoodData = UnicodeData.GetData(ch);
                double    actualValue   = CharUnicodeInfo.GetNumericValue(ch);

                AssertEqual(knownGoodData.NumericValue, actualValue, nameof(CharUnicodeInfo.GetNumericValue), knownGoodData);
            }
        }
Example #9
0
        public void GetUnicodeCategory_Char()
        {
            for (int i = 0; i <= char.MaxValue; i++)
            {
                char ch = (char)i;

                CodePoint       knownGoodData  = UnicodeData.GetData(ch);
                UnicodeCategory actualCategory = CharUnicodeInfo.GetUnicodeCategory(ch);

                AssertEqual(knownGoodData.GeneralCategory, actualCategory, nameof(CharUnicodeInfo.GetUnicodeCategory), knownGoodData);
            }
        }
Example #10
0
        public static void GetUnicodeCategory_Char_AllInputs()
        {
            // This tests calls char.GetUnicodeCategory for every possible input, ensuring that
            // the runtime agrees with the data in the core Unicode files.

            for (uint i = 0; i <= char.MaxValue; i++)
            {
                UnicodeCategory expected;

                // The code points in the switch block below must be special-cased
                // because they switched categories between versions of the Unicode
                // specification. For compatibility reasons Char keeps its own copy
                // of the categories for the first 256 code points, as it's locked
                // to an earlier version of the standard. For an example of a code
                // point that switched categories, see the discussion on U+00AD
                // SOFT HYPHEN at https://www.unicode.org/versions/Unicode4.0.0/.

                switch (i)
                {
                case '\u00a7':
                case '\u00b6':
                    expected = UnicodeCategory.OtherSymbol;
                    break;

                case '\u00aa':
                case '\u00ba':
                    expected = UnicodeCategory.LowercaseLetter;
                    break;

                case '\u00ad':
                    expected = UnicodeCategory.DashPunctuation;
                    break;

                default:
                    expected = UnicodeData.GetUnicodeCategory(i);
                    break;
                }

                if (expected != char.GetUnicodeCategory((char)i))
                {
                    // We'll build up the exception message ourselves so the dev knows what code point failed.
                    throw new AssertActualExpectedException(
                              expected: expected,
                              actual: char.GetUnicodeCategory((char)i),
                              userMessage: FormattableString.Invariant($@"char.GetUnicodeCategory('\u{i:X4}') returned wrong value."));
                }
            }
        }
 public void CodePointEncodingTest()
 {
     using (var stream = new MemoryStream(4))
         using (var writer = new BinaryWriter(stream, Encoding.UTF8, true))
             using (var reader = new BinaryReader(stream, Encoding.UTF8, true))
             {
                 for (int i = 0; i <= 0x10FFFF; ++i)
                 {
                     writer.WriteCodePoint(i);
                     writer.Flush();
                     stream.Position = 0;
                     Assert.Equal(i, UnicodeData.ReadCodePoint(reader));
                     stream.Position = 0;
                 }
             }
 }
Example #12
0
        public static void GetUnicodeCategory_AllInputs()
        {
            // This tests calls Rune.GetUnicodeCategory for every possible input, ensuring that
            // the runtime agrees with the data in the core Unicode files.

            foreach (Rune rune in AllRunes())
            {
                if (UnicodeData.GetUnicodeCategory(rune.Value) != Rune.GetUnicodeCategory(rune))
                {
                    // We'll build up the exception message ourselves so the dev knows what code point failed.
                    throw new AssertActualExpectedException(
                              expected: UnicodeData.GetUnicodeCategory(rune.Value),
                              actual: Rune.GetUnicodeCategory(rune),
                              userMessage: FormattableString.Invariant($@"Rune.GetUnicodeCategory(U+{rune.Value:X4}) returned wrong value."));
                }
            }
        }
Example #13
0
        public static void IsLetter_Char_AllInputs()
        {
            // This tests calls char.IsLetter for every possible input, ensuring that
            // the runtime agrees with the data in the core Unicode files.

            for (uint i = 0; i <= char.MaxValue; i++)
            {
                if (UnicodeData.IsLetter((char)i) != char.IsLetter((char)i))
                {
                    // We'll build up the exception message ourselves so the dev knows what code point failed.
                    throw new AssertActualExpectedException(
                              expected: UnicodeData.IsLetter((char)i),
                              actual: char.IsLetter((char)i),
                              userMessage: FormattableString.Invariant($@"char.IsLetter('\u{i:X4}') returned wrong value."));
                }
            }
        }
        Action CreatButtons(UnicodeData[] array1D)
        {
            var act = new Action(() =>
            {
                this.panel1.Controls.Clear();
                int tabIndex = 0;
                for (int i = 0, j = 0, k = 0; i < 1; i++)
                {
                    for (int l = 0; l < array1D.Length; l++)
                    {
                        UnicodeData d = array1D[l];
                        Button butt   = new Button();
                        //
                        // button
                        //
                        butt.Location = new System.Drawing.Point(3 + j * 40, 3 + k * 40);
                        butt.Name     = "btnClear";
                        butt.Size     = new System.Drawing.Size(32, 27);
                        butt.TabIndex = i * 17 + l;
                        butt.Text     = d.Name;
                        butt.Tag      = d;
                        butt.UseVisualStyleBackColor = true;
                        butt.Click += new System.EventHandler(this.btnUnicode_Click);
                        if (d.IsLeaf)
                        {
                            this.toolTip1.SetToolTip(butt, d.Title);
                        }
                        else
                        {
                            butt.Enabled = false;
                        }
                        this.panel1.Controls.Add(butt);
                        j++;
                        if (j == 17)
                        {
                            j = 0;
                            k++;
                        }
                        tabIndex = butt.TabIndex;
                    }
                }
                SetTabIndex(tabIndex, new Control[] { btnClear, btnDel, textBox1 });
            });

            return(act);
        }
        private void btnPut_Click(object sender, EventArgs e)
        {
            if (cboUnicodeRange.SelectedValue == null)
            {
                cboUnicodeRange.SelectedValue = "cjk-unified-ideographs";
            }

            StringBuilder sbText = new StringBuilder();

            foreach (var item in this.panel1.Controls)
            {
                Button butt = item as Button;
                if (butt != null && butt.Enabled)
                {
                    UnicodeData d = butt.Tag as UnicodeData;
                    int         c = d.DataCode;
                    string      s = ((char)c).ToString();
                    sbText.AppendLine(s);
                }
            }
            textBox1.Text = sbText.ToString();
        }
Example #16
0
        public static void IsLower_Char_AllInputs()
        {
            // This tests calls char.IsLower for every possible input, ensuring that
            // the runtime agrees with the data in the core Unicode files.

            for (uint i = 0; i <= char.MaxValue; i++)
            {
                bool expected;

                switch (i)
                {
                case '\u00AA':     // FEMININE ORDINAL INDICATOR
                case '\u00BA':     // MASCULINE ORDINAL INDICATOR

                    // In Unicode 6.1 the code points U+00AA and U+00BA were reassigned
                    // from category Ll to category Lo. However, for compatibility reasons,
                    // Char uses the older version of the Unicode standard for code points
                    // in the range U+0000..U+00FF. So we'll special-case these here.
                    // More info: https://www.unicode.org/review/pri181/

                    expected = true;
                    break;

                default:
                    expected = UnicodeData.GetUnicodeCategory((char)i) == UnicodeCategory.LowercaseLetter;
                    break;
                }

                if (expected != char.IsLower((char)i))
                {
                    // We'll build up the exception message ourselves so the dev knows what code point failed.
                    throw new AssertActualExpectedException(
                              expected: expected,
                              actual: char.IsLower((char)i),
                              userMessage: FormattableString.Invariant($@"char.IsLower('\u{i:X4}') returned wrong value."));
                }
            }
        }
Example #17
0
        private async Task <string> GETTranslateAPI(UnicodeData[] array1D)
        {
            int iLen = array1D.Length;

            iLen = 3;
            StringBuilder sb  = new StringBuilder();
            StringBuilder sb1 = new StringBuilder();

            for (int i = 0; i < iLen; i++)
            {
                UnicodeData d = array1D[i];
                int         c = d.DataCode;
                string      s = ((char)c).ToString();
                sb.AppendLine(s);
                if (i % 500 == 0 || i == iLen - 1)
                {
                    sb1.Append(await AcF1.GetSingle(sb.ToString()));
                    sb.Clear();
                }
            }

            return(sb1.ToString());;
        }
        private async void cboUnicodeRange_SelectedIndexChanged(object sender, EventArgs e)
        {
            UniCodeRange range  = cboUnicodeRange.SelectedItem as UniCodeRange;
            string       source = await Code(range.Href);

            UnicodeData[] us = await ReadXml(source);

            StringBuilder sbText = new StringBuilder();
            UnicodeData   item   = null;

            for (int i = 0; i < us.Count(); i++)
            {
                item = us[i];
                if (item != null)
                {
                    int    c = item.DataCode;
                    string s = ((char)c).ToString();
                    sbText.AppendLine(s);
                }
            }

            textBox1.Text = sbText.ToString();
        }
Example #19
0
        private static void Main(string[] args)
        {
            Verbose           = args.Contains("-Verbose", StringComparer.OrdinalIgnoreCase);
            IncludeCasingData = args.Contains("-IncludeCasingData", StringComparer.OrdinalIgnoreCase);

            // First, read the data files and build up a list of all
            // assigned code points.

            Console.WriteLine("Reading Unicode data files...");

            _ = UnicodeData.GetData(0); // processes files

            Console.WriteLine("Finished.");
            Console.WriteLine();

            Console.WriteLine("Initializing maps...");
            Dictionary <CategoryCasingInfo, byte>  categoryCasingMap  = new Dictionary <CategoryCasingInfo, byte>();
            Dictionary <NumericGraphemeInfo, byte> numericGraphemeMap = new Dictionary <NumericGraphemeInfo, byte>();

            // Next, iterate though all assigned code points, populating
            // the category casing & numeric grapheme maps. Also put the
            // data into the the DataTable structure, which will compute
            // the tiered offset tables.

            DataTable categoryCasingTable  = new DataTable();
            DataTable numericGraphemeTable = new DataTable();

            for (int i = 0; i <= 0x10_FFFF; i++)
            {
                CodePoint thisCodePoint = UnicodeData.GetData(i);

                CategoryCasingInfo categoryCasingInfo = new CategoryCasingInfo(thisCodePoint);
                if (!categoryCasingMap.TryGetValue(categoryCasingInfo, out byte cciValue))
                {
                    cciValue = (byte)categoryCasingMap.Count;
                    categoryCasingMap[categoryCasingInfo] = cciValue;
                }
                categoryCasingTable.AddData((uint)i, cciValue);

                NumericGraphemeInfo numericGraphemeInfo = new NumericGraphemeInfo(thisCodePoint);
                if (!numericGraphemeMap.TryGetValue(numericGraphemeInfo, out byte ngiValue))
                {
                    ngiValue = (byte)numericGraphemeMap.Count;
                    numericGraphemeMap[numericGraphemeInfo] = ngiValue;
                }
                numericGraphemeTable.AddData((uint)i, ngiValue);
            }

            // Did anything overflow?

            Console.WriteLine($"CategoryCasingMap contains {categoryCasingMap.Count} entries.");
            if (categoryCasingMap.Count > 256)
            {
                throw new Exception("CategoryCasingMap exceeds max count of 256 entries!");
            }

            Console.WriteLine($"NumericGraphemeMap contains {numericGraphemeMap.Count} entries.");
            if (numericGraphemeMap.Count > 256)
            {
                throw new Exception("NumericGraphemeMap exceeds max count of 256 entries!");
            }

            Console.WriteLine();

            // Choose default ratios for the data tables we'll be generating.

            TableLevels categoryCasingTableLevelBits  = new TableLevels(5, 4);
            TableLevels numericGraphemeTableLevelBits = new TableLevels(5, 4);

            // Now generate the tables.

            categoryCasingTable.GenerateTable("CategoryCasingTable", categoryCasingTableLevelBits.Level2Bits, categoryCasingTableLevelBits.Level3Bits);
            numericGraphemeTable.GenerateTable("NumericGraphemeTable", numericGraphemeTableLevelBits.Level2Bits, numericGraphemeTableLevelBits.Level3Bits);

            // If you want to see if a different ratio would have better compression
            // statistics, uncomment the lines below and re-run the application.
            // categoryCasingTable.CalculateTableVariants();
            // numericGraphemeTable.CalculateTableVariants();

            // Now generate the C# source file.

            using (StreamWriter file = File.CreateText(SOURCE_NAME))
            {
                file.Write("// Licensed to the .NET Foundation under one or more agreements.\n");
                file.Write("// The .NET Foundation licenses this file to you under the MIT license.\n");

                file.Write("using System.Diagnostics;\n\n");

                file.Write("namespace System.Globalization\n");
                file.Write("{\n");
                file.Write("    public static partial class CharUnicodeInfo\n    {\n");

                file.Write("        // THE FOLLOWING DATA IS AUTO GENERATED BY GenUnicodeProp program UNDER THE TOOLS FOLDER\n");
                file.Write("        // PLEASE DON'T MODIFY BY HAND\n");
                file.Write("        // IF YOU NEED TO UPDATE UNICODE VERSION FOLLOW THE GUIDE AT src/libraries/System.Private.CoreLib/Tools/GenUnicodeProp/Updating-Unicode-Versions.md\n");

                PrintAssertTableLevelsBitCountRoutine("CategoryCasing", file, categoryCasingTableLevelBits);

                file.Write($"\n        // {categoryCasingTableLevelBits} index table of the Unicode category & casing data.");
                PrintSourceIndexArray("CategoryCasingLevel1Index", categoryCasingTable, file);

                file.Write("\n        // Contains Unicode category & bidi class information");
                PrintValueArray("CategoriesValues", categoryCasingMap, CategoryCasingInfo.ToCategoryBytes, file);

                if (IncludeCasingData)
                {
                    // Only write out the casing data if we have been asked to do so.

                    file.Write("\n        // Contains simple culture-invariant uppercase mappings");
                    PrintValueArray("UppercaseValues", categoryCasingMap, CategoryCasingInfo.ToUpperBytes, file);

                    file.Write("\n        // Contains simple culture-invariant lowercase mappings");
                    PrintValueArray("LowercaseValues", categoryCasingMap, CategoryCasingInfo.ToLowerBytes, file);

                    file.Write("\n        // Contains simple culture-invariant titlecase mappings");
                    PrintValueArray("TitlecaseValues", categoryCasingMap, CategoryCasingInfo.ToTitleBytes, file);

                    file.Write("\n        // Contains simple culture-invariant case fold mappings");
                    PrintValueArray("CaseFoldValues", categoryCasingMap, CategoryCasingInfo.ToCaseFoldBytes, file);
                }

                PrintAssertTableLevelsBitCountRoutine("NumericGrapheme", file, numericGraphemeTableLevelBits);

                file.Write($"\n        // {numericGraphemeTableLevelBits} index table of the Unicode numeric & text segmentation data.");
                PrintSourceIndexArray("NumericGraphemeLevel1Index", numericGraphemeTable, file);

                file.Write("\n        // Contains decimal digit values in high nibble; digit values in low nibble");
                PrintValueArray("DigitValues", numericGraphemeMap, NumericGraphemeInfo.ToDigitBytes, file);

                file.Write("\n        // Contains numeric values");
                PrintValueArray("NumericValues", numericGraphemeMap, NumericGraphemeInfo.ToNumericBytes, file);

                file.Write("\n        // Contains grapheme cluster segmentation values");
                PrintValueArray("GraphemeSegmentationValues", numericGraphemeMap, NumericGraphemeInfo.ToGraphemeBytes, file);

                file.Write("\n    }\n}\n");
            }

            // Quick fixup: Replace \n with \r\n on Windows.

            if (Environment.NewLine != "\n")
            {
                File.WriteAllText(SOURCE_NAME, File.ReadAllText(SOURCE_NAME).Replace("\n", Environment.NewLine));
            }

            Console.WriteLine("Completed!");
        }