static void UseStringInfo() { string s = new string(new char[] { '\u0041', '\u030A', '\u0625', '\u0650' }); //a combined string s = s.Insert(2, "+"); System.Globalization.StringInfo info = new System.Globalization.StringInfo(s); Console.WriteLine(s.Length + " | " + info.LengthInTextElements); System.Globalization.TextElementEnumerator enumerator = System.Globalization.StringInfo.GetTextElementEnumerator(s); //enumerator开始是在null位置, 需要先movenext string output = "Traverse string by Text Element: " + System.Environment.NewLine; while (enumerator.MoveNext() == true) { output += enumerator.GetTextElement() + System.Environment.NewLine; } output += "Traverse string by Character" + System.Environment.NewLine; List <char> cl = new List <char>(); foreach (char c in s) { output += c + System.Environment.NewLine; } output += "Whole string is represented as" + System.Environment.NewLine; output += s; MessageBox.Show(output); }
private static System.Collections.Generic.List <string> GraphemeClusters(string s) { System.Collections.Generic.List <string> ls = new System.Collections.Generic.List <string>(); System.Globalization.TextElementEnumerator enumerator = System.Globalization.StringInfo.GetTextElementEnumerator(s); while (enumerator.MoveNext()) { ls.Add((string)enumerator.Current); } return(ls); }
// Use Rune instead of this once we're able to be on .NET Core 3.1 and above public static List <string> TextElements(this string input) { System.Globalization.TextElementEnumerator enumerator = System.Globalization.StringInfo.GetTextElementEnumerator(input); List <string> list = new List <string>(); while (enumerator.MoveNext()) { list.Add(enumerator.GetTextElement()); } return(list); }
public static List <string> SplitEmojiString(string inputText) { List <string> outList = new List <string>(); System.Globalization.TextElementEnumerator enumerator = System.Globalization.StringInfo.GetTextElementEnumerator(inputText); while (enumerator.MoveNext()) { outList.Add(enumerator.GetTextElement()); } return(outList); }
static void Main() { // Create a file that contains the Greek work ψυχή (psyche) when interpreted by using // code page 737 ((DOS) Greek). You can also create the file by using Character Map // to paste the characters into Microsoft Word and then "Save As" by using the DOS // (Greek) encoding. (Word will actually create a six-byte file by appending "\r\n" at the end.) System.IO.File.WriteAllBytes(@"greek.txt", new byte[] { 0xAF, 0xAC, 0xAE, 0x9E }); // Specify the code page to correctly interpret byte values Encoding encoding = Encoding.GetEncoding(737); //(DOS) Greek code page byte[] codePageValues = System.IO.File.ReadAllBytes(@"greek.txt"); // Same content is now encoded as UTF-16 string unicodeValues = encoding.GetString(codePageValues); // Show that the text content is still intact in Unicode string // (Add a reference to System.Windows.Forms.dll) System.Windows.Forms.MessageBox.Show(unicodeValues); // Same content "ψυχή" is stored as UTF-8 System.IO.File.WriteAllText(@"greek_unicode.txt", unicodeValues); // Conversion is complete. Show the bytes to prove the conversion. Console.WriteLine("8-bit encoding byte values:"); foreach (byte b in codePageValues) { Console.Write("{0:X}-", b); } Console.WriteLine(); Console.WriteLine("Unicode values:"); string unicodeString = System.IO.File.ReadAllText("greek_unicode.txt"); System.Globalization.TextElementEnumerator enumerator = System.Globalization.StringInfo.GetTextElementEnumerator(unicodeString); while (enumerator.MoveNext()) { string s = enumerator.GetTextElement(); int i = Char.ConvertToUtf32(s, 0); Console.Write("{0:X}-", i); } Console.WriteLine(); // Keep the console window open in debug mode. Console.Write("Press any key to exit."); Console.ReadKey(); }