private async static Task <UnicodeData[]> ReadXml(string source) { Task <UnicodeData[]> t = Task.Run(() => { IEnumerable <XElement> IEXelem = XElement.Parse(source).Descendants(); int j = 0; UnicodeData[] list = new UnicodeData[IEXelem.Count()]; foreach (XElement level1Element in IEXelem) { //< div class="u"><span>䷀</span><tt>4dc0</tt></div> if (level1Element.Attribute("class") != null && level1Element.Attribute("class").Value.ToLower().StartsWith("u")) { list[j] = new UnicodeData() { Name = level1Element.Element("span").Value, IsLeaf = true, Code = level1Element.Element("tt").Value, Title = level1Element.Element("span").Value, }; list[j].DataCode = Convert.ToInt32("0x" + list[j].Code, 16); } j++; } return(list); }); UnicodeData[] res = await t; return(res); }
static async Task _InitAsync() { UnicodeData = await Task.Run(async() => { var file = await StorageFile.GetFileFromApplicationUriAsync(new Uri("ms-appx:///Data/ucd.all.optimized.xml")); var ras = await file.OpenReadAsync(); using (var stream = ras.AsStreamForRead()) { return(UnicodeData.Load(stream)); } }); Fonts = (await FontData.LoadAsync()).OrderBy(f => f.Name).ToList(); #if DEBUG //var outputfolder = ApplicationData.Current.LocalFolder; //var outputfile = await outputfolder.CreateFileAsync("ucd.all.optimized.xml", CreationCollisionOption.ReplaceExisting); //var rasoutput = await outputfile.OpenAsync(FileAccessMode.ReadWrite); //await Task.Run(() => //{ // using (var stream = rasoutput.AsStreamForWrite()) // { // UnicodeData.Save(stream, UnicodeData); // } //}); #endif }
private static void ReadUnicodeData(UnicodeData ud) { foreach (var e in ud.GetEntries().Skip(30).Take(50)) { Console.WriteLine(e); } }
private void btnUnicode_Click(object sender, EventArgs e) { Button butt = sender as Button; UnicodeData d = butt.Tag as UnicodeData; int c = Convert.ToInt32("0x" + d.Code, 16); String s = ((char)c).ToString(); textBox1.AppendText(s); }
public static void IsWhiteSpace_AllInputs() { // This tests calls Rune.IsWhiteSpace for every possible input, ensuring that // the runtime agrees with the data in the core Unicode files. foreach (Rune rune in AllRunes()) { Assert.Equal(UnicodeData.IsWhiteSpace(rune.Value), Rune.IsWhiteSpace(rune)); } }
public void GetUnicodeCategory_Int32() { for (int i = 0; i <= HIGHEST_CODE_POINT; i++) { CodePoint knownGoodData = UnicodeData.GetData(i); UnicodeCategory actualCategory = CharUnicodeInfo.GetUnicodeCategory(i); AssertEqual(knownGoodData.GeneralCategory, actualCategory, nameof(CharUnicodeInfo.GetUnicodeCategory), knownGoodData); } }
private static void DecompositionMap(UnicodeData ud) { foreach (var e in ud.GetEntries()) { if (e.DecompositionMapping.Length == 0) { continue; } Console.WriteLine(e.DecompositionMapping); } }
public void GetNumericValue_Char() { for (int i = 0; i <= char.MaxValue; i++) { char ch = (char)i; CodePoint knownGoodData = UnicodeData.GetData(ch); double actualValue = CharUnicodeInfo.GetNumericValue(ch); AssertEqual(knownGoodData.NumericValue, actualValue, nameof(CharUnicodeInfo.GetNumericValue), knownGoodData); } }
public void GetUnicodeCategory_Char() { for (int i = 0; i <= char.MaxValue; i++) { char ch = (char)i; CodePoint knownGoodData = UnicodeData.GetData(ch); UnicodeCategory actualCategory = CharUnicodeInfo.GetUnicodeCategory(ch); AssertEqual(knownGoodData.GeneralCategory, actualCategory, nameof(CharUnicodeInfo.GetUnicodeCategory), knownGoodData); } }
public static void GetUnicodeCategory_Char_AllInputs() { // This tests calls char.GetUnicodeCategory for every possible input, ensuring that // the runtime agrees with the data in the core Unicode files. for (uint i = 0; i <= char.MaxValue; i++) { UnicodeCategory expected; // The code points in the switch block below must be special-cased // because they switched categories between versions of the Unicode // specification. For compatibility reasons Char keeps its own copy // of the categories for the first 256 code points, as it's locked // to an earlier version of the standard. For an example of a code // point that switched categories, see the discussion on U+00AD // SOFT HYPHEN at https://www.unicode.org/versions/Unicode4.0.0/. switch (i) { case '\u00a7': case '\u00b6': expected = UnicodeCategory.OtherSymbol; break; case '\u00aa': case '\u00ba': expected = UnicodeCategory.LowercaseLetter; break; case '\u00ad': expected = UnicodeCategory.DashPunctuation; break; default: expected = UnicodeData.GetUnicodeCategory(i); break; } if (expected != char.GetUnicodeCategory((char)i)) { // We'll build up the exception message ourselves so the dev knows what code point failed. throw new AssertActualExpectedException( expected: expected, actual: char.GetUnicodeCategory((char)i), userMessage: FormattableString.Invariant($@"char.GetUnicodeCategory('\u{i:X4}') returned wrong value.")); } } }
public void CodePointEncodingTest() { using (var stream = new MemoryStream(4)) using (var writer = new BinaryWriter(stream, Encoding.UTF8, true)) using (var reader = new BinaryReader(stream, Encoding.UTF8, true)) { for (int i = 0; i <= 0x10FFFF; ++i) { writer.WriteCodePoint(i); writer.Flush(); stream.Position = 0; Assert.Equal(i, UnicodeData.ReadCodePoint(reader)); stream.Position = 0; } } }
public static void GetUnicodeCategory_AllInputs() { // This tests calls Rune.GetUnicodeCategory for every possible input, ensuring that // the runtime agrees with the data in the core Unicode files. foreach (Rune rune in AllRunes()) { if (UnicodeData.GetUnicodeCategory(rune.Value) != Rune.GetUnicodeCategory(rune)) { // We'll build up the exception message ourselves so the dev knows what code point failed. throw new AssertActualExpectedException( expected: UnicodeData.GetUnicodeCategory(rune.Value), actual: Rune.GetUnicodeCategory(rune), userMessage: FormattableString.Invariant($@"Rune.GetUnicodeCategory(U+{rune.Value:X4}) returned wrong value.")); } } }
public static void IsLetter_Char_AllInputs() { // This tests calls char.IsLetter for every possible input, ensuring that // the runtime agrees with the data in the core Unicode files. for (uint i = 0; i <= char.MaxValue; i++) { if (UnicodeData.IsLetter((char)i) != char.IsLetter((char)i)) { // We'll build up the exception message ourselves so the dev knows what code point failed. throw new AssertActualExpectedException( expected: UnicodeData.IsLetter((char)i), actual: char.IsLetter((char)i), userMessage: FormattableString.Invariant($@"char.IsLetter('\u{i:X4}') returned wrong value.")); } } }
Action CreatButtons(UnicodeData[] array1D) { var act = new Action(() => { this.panel1.Controls.Clear(); int tabIndex = 0; for (int i = 0, j = 0, k = 0; i < 1; i++) { for (int l = 0; l < array1D.Length; l++) { UnicodeData d = array1D[l]; Button butt = new Button(); // // button // butt.Location = new System.Drawing.Point(3 + j * 40, 3 + k * 40); butt.Name = "btnClear"; butt.Size = new System.Drawing.Size(32, 27); butt.TabIndex = i * 17 + l; butt.Text = d.Name; butt.Tag = d; butt.UseVisualStyleBackColor = true; butt.Click += new System.EventHandler(this.btnUnicode_Click); if (d.IsLeaf) { this.toolTip1.SetToolTip(butt, d.Title); } else { butt.Enabled = false; } this.panel1.Controls.Add(butt); j++; if (j == 17) { j = 0; k++; } tabIndex = butt.TabIndex; } } SetTabIndex(tabIndex, new Control[] { btnClear, btnDel, textBox1 }); }); return(act); }
private void btnPut_Click(object sender, EventArgs e) { if (cboUnicodeRange.SelectedValue == null) { cboUnicodeRange.SelectedValue = "cjk-unified-ideographs"; } StringBuilder sbText = new StringBuilder(); foreach (var item in this.panel1.Controls) { Button butt = item as Button; if (butt != null && butt.Enabled) { UnicodeData d = butt.Tag as UnicodeData; int c = d.DataCode; string s = ((char)c).ToString(); sbText.AppendLine(s); } } textBox1.Text = sbText.ToString(); }
public static void IsLower_Char_AllInputs() { // This tests calls char.IsLower for every possible input, ensuring that // the runtime agrees with the data in the core Unicode files. for (uint i = 0; i <= char.MaxValue; i++) { bool expected; switch (i) { case '\u00AA': // FEMININE ORDINAL INDICATOR case '\u00BA': // MASCULINE ORDINAL INDICATOR // In Unicode 6.1 the code points U+00AA and U+00BA were reassigned // from category Ll to category Lo. However, for compatibility reasons, // Char uses the older version of the Unicode standard for code points // in the range U+0000..U+00FF. So we'll special-case these here. // More info: https://www.unicode.org/review/pri181/ expected = true; break; default: expected = UnicodeData.GetUnicodeCategory((char)i) == UnicodeCategory.LowercaseLetter; break; } if (expected != char.IsLower((char)i)) { // We'll build up the exception message ourselves so the dev knows what code point failed. throw new AssertActualExpectedException( expected: expected, actual: char.IsLower((char)i), userMessage: FormattableString.Invariant($@"char.IsLower('\u{i:X4}') returned wrong value.")); } } }
private async Task <string> GETTranslateAPI(UnicodeData[] array1D) { int iLen = array1D.Length; iLen = 3; StringBuilder sb = new StringBuilder(); StringBuilder sb1 = new StringBuilder(); for (int i = 0; i < iLen; i++) { UnicodeData d = array1D[i]; int c = d.DataCode; string s = ((char)c).ToString(); sb.AppendLine(s); if (i % 500 == 0 || i == iLen - 1) { sb1.Append(await AcF1.GetSingle(sb.ToString())); sb.Clear(); } } return(sb1.ToString());; }
private async void cboUnicodeRange_SelectedIndexChanged(object sender, EventArgs e) { UniCodeRange range = cboUnicodeRange.SelectedItem as UniCodeRange; string source = await Code(range.Href); UnicodeData[] us = await ReadXml(source); StringBuilder sbText = new StringBuilder(); UnicodeData item = null; for (int i = 0; i < us.Count(); i++) { item = us[i]; if (item != null) { int c = item.DataCode; string s = ((char)c).ToString(); sbText.AppendLine(s); } } textBox1.Text = sbText.ToString(); }
private static void Main(string[] args) { Verbose = args.Contains("-Verbose", StringComparer.OrdinalIgnoreCase); IncludeCasingData = args.Contains("-IncludeCasingData", StringComparer.OrdinalIgnoreCase); // First, read the data files and build up a list of all // assigned code points. Console.WriteLine("Reading Unicode data files..."); _ = UnicodeData.GetData(0); // processes files Console.WriteLine("Finished."); Console.WriteLine(); Console.WriteLine("Initializing maps..."); Dictionary <CategoryCasingInfo, byte> categoryCasingMap = new Dictionary <CategoryCasingInfo, byte>(); Dictionary <NumericGraphemeInfo, byte> numericGraphemeMap = new Dictionary <NumericGraphemeInfo, byte>(); // Next, iterate though all assigned code points, populating // the category casing & numeric grapheme maps. Also put the // data into the the DataTable structure, which will compute // the tiered offset tables. DataTable categoryCasingTable = new DataTable(); DataTable numericGraphemeTable = new DataTable(); for (int i = 0; i <= 0x10_FFFF; i++) { CodePoint thisCodePoint = UnicodeData.GetData(i); CategoryCasingInfo categoryCasingInfo = new CategoryCasingInfo(thisCodePoint); if (!categoryCasingMap.TryGetValue(categoryCasingInfo, out byte cciValue)) { cciValue = (byte)categoryCasingMap.Count; categoryCasingMap[categoryCasingInfo] = cciValue; } categoryCasingTable.AddData((uint)i, cciValue); NumericGraphemeInfo numericGraphemeInfo = new NumericGraphemeInfo(thisCodePoint); if (!numericGraphemeMap.TryGetValue(numericGraphemeInfo, out byte ngiValue)) { ngiValue = (byte)numericGraphemeMap.Count; numericGraphemeMap[numericGraphemeInfo] = ngiValue; } numericGraphemeTable.AddData((uint)i, ngiValue); } // Did anything overflow? Console.WriteLine($"CategoryCasingMap contains {categoryCasingMap.Count} entries."); if (categoryCasingMap.Count > 256) { throw new Exception("CategoryCasingMap exceeds max count of 256 entries!"); } Console.WriteLine($"NumericGraphemeMap contains {numericGraphemeMap.Count} entries."); if (numericGraphemeMap.Count > 256) { throw new Exception("NumericGraphemeMap exceeds max count of 256 entries!"); } Console.WriteLine(); // Choose default ratios for the data tables we'll be generating. TableLevels categoryCasingTableLevelBits = new TableLevels(5, 4); TableLevels numericGraphemeTableLevelBits = new TableLevels(5, 4); // Now generate the tables. categoryCasingTable.GenerateTable("CategoryCasingTable", categoryCasingTableLevelBits.Level2Bits, categoryCasingTableLevelBits.Level3Bits); numericGraphemeTable.GenerateTable("NumericGraphemeTable", numericGraphemeTableLevelBits.Level2Bits, numericGraphemeTableLevelBits.Level3Bits); // If you want to see if a different ratio would have better compression // statistics, uncomment the lines below and re-run the application. // categoryCasingTable.CalculateTableVariants(); // numericGraphemeTable.CalculateTableVariants(); // Now generate the C# source file. using (StreamWriter file = File.CreateText(SOURCE_NAME)) { file.Write("// Licensed to the .NET Foundation under one or more agreements.\n"); file.Write("// The .NET Foundation licenses this file to you under the MIT license.\n"); file.Write("using System.Diagnostics;\n\n"); file.Write("namespace System.Globalization\n"); file.Write("{\n"); file.Write(" public static partial class CharUnicodeInfo\n {\n"); file.Write(" // THE FOLLOWING DATA IS AUTO GENERATED BY GenUnicodeProp program UNDER THE TOOLS FOLDER\n"); file.Write(" // PLEASE DON'T MODIFY BY HAND\n"); file.Write(" // IF YOU NEED TO UPDATE UNICODE VERSION FOLLOW THE GUIDE AT src/libraries/System.Private.CoreLib/Tools/GenUnicodeProp/Updating-Unicode-Versions.md\n"); PrintAssertTableLevelsBitCountRoutine("CategoryCasing", file, categoryCasingTableLevelBits); file.Write($"\n // {categoryCasingTableLevelBits} index table of the Unicode category & casing data."); PrintSourceIndexArray("CategoryCasingLevel1Index", categoryCasingTable, file); file.Write("\n // Contains Unicode category & bidi class information"); PrintValueArray("CategoriesValues", categoryCasingMap, CategoryCasingInfo.ToCategoryBytes, file); if (IncludeCasingData) { // Only write out the casing data if we have been asked to do so. file.Write("\n // Contains simple culture-invariant uppercase mappings"); PrintValueArray("UppercaseValues", categoryCasingMap, CategoryCasingInfo.ToUpperBytes, file); file.Write("\n // Contains simple culture-invariant lowercase mappings"); PrintValueArray("LowercaseValues", categoryCasingMap, CategoryCasingInfo.ToLowerBytes, file); file.Write("\n // Contains simple culture-invariant titlecase mappings"); PrintValueArray("TitlecaseValues", categoryCasingMap, CategoryCasingInfo.ToTitleBytes, file); file.Write("\n // Contains simple culture-invariant case fold mappings"); PrintValueArray("CaseFoldValues", categoryCasingMap, CategoryCasingInfo.ToCaseFoldBytes, file); } PrintAssertTableLevelsBitCountRoutine("NumericGrapheme", file, numericGraphemeTableLevelBits); file.Write($"\n // {numericGraphemeTableLevelBits} index table of the Unicode numeric & text segmentation data."); PrintSourceIndexArray("NumericGraphemeLevel1Index", numericGraphemeTable, file); file.Write("\n // Contains decimal digit values in high nibble; digit values in low nibble"); PrintValueArray("DigitValues", numericGraphemeMap, NumericGraphemeInfo.ToDigitBytes, file); file.Write("\n // Contains numeric values"); PrintValueArray("NumericValues", numericGraphemeMap, NumericGraphemeInfo.ToNumericBytes, file); file.Write("\n // Contains grapheme cluster segmentation values"); PrintValueArray("GraphemeSegmentationValues", numericGraphemeMap, NumericGraphemeInfo.ToGraphemeBytes, file); file.Write("\n }\n}\n"); } // Quick fixup: Replace \n with \r\n on Windows. if (Environment.NewLine != "\n") { File.WriteAllText(SOURCE_NAME, File.ReadAllText(SOURCE_NAME).Replace("\n", Environment.NewLine)); } Console.WriteLine("Completed!"); }