Beispiel #1
0
        static void UseStringInfo()
        {
            string s = new string(new char[] { '\u0041', '\u030A', '\u0625', '\u0650' }); //a combined string

            s = s.Insert(2, "+");
            System.Globalization.StringInfo info = new System.Globalization.StringInfo(s);
            Console.WriteLine(s.Length + " | " + info.LengthInTextElements);

            System.Globalization.TextElementEnumerator enumerator = System.Globalization.StringInfo.GetTextElementEnumerator(s);
            //enumerator开始是在null位置, 需要先movenext
            string output = "Traverse string by Text Element: " + System.Environment.NewLine;

            while (enumerator.MoveNext() == true)
            {
                output += enumerator.GetTextElement() + System.Environment.NewLine;
            }
            output += "Traverse string by Character" + System.Environment.NewLine;
            List <char> cl = new List <char>();

            foreach (char c in s)
            {
                output += c + System.Environment.NewLine;
            }
            output += "Whole string is represented as" + System.Environment.NewLine;
            output += s;
            MessageBox.Show(output);
        }
Beispiel #2
0
 private static System.Collections.Generic.List <string> GraphemeClusters(string s)
 {
     System.Collections.Generic.List <string>   ls         = new System.Collections.Generic.List <string>();
     System.Globalization.TextElementEnumerator enumerator = System.Globalization.StringInfo.GetTextElementEnumerator(s);
     while (enumerator.MoveNext())
     {
         ls.Add((string)enumerator.Current);
     }
     return(ls);
 }
Beispiel #3
0
        // Use Rune instead of this once we're able to be on .NET Core 3.1 and above
        public static List <string> TextElements(this string input)
        {
            System.Globalization.TextElementEnumerator enumerator = System.Globalization.StringInfo.GetTextElementEnumerator(input);
            List <string> list = new List <string>();

            while (enumerator.MoveNext())
            {
                list.Add(enumerator.GetTextElement());
            }
            return(list);
        }
Beispiel #4
0
        public static List <string> SplitEmojiString(string inputText)
        {
            List <string> outList = new List <string>();

            System.Globalization.TextElementEnumerator enumerator = System.Globalization.StringInfo.GetTextElementEnumerator(inputText);

            while (enumerator.MoveNext())
            {
                outList.Add(enumerator.GetTextElement());
            }

            return(outList);
        }
Beispiel #5
0
        static void Main()
        {
            // Create a file that contains the Greek work ψυχή (psyche) when interpreted by using
            // code page 737 ((DOS) Greek). You can also create the file by using Character Map
            // to paste the characters into Microsoft Word and then "Save As" by using the DOS
            // (Greek) encoding. (Word will actually create a six-byte file by appending "\r\n" at the end.)
            System.IO.File.WriteAllBytes(@"greek.txt", new byte[] { 0xAF, 0xAC, 0xAE, 0x9E });

            // Specify the code page to correctly interpret byte values
            Encoding encoding = Encoding.GetEncoding(737); //(DOS) Greek code page

            byte[] codePageValues = System.IO.File.ReadAllBytes(@"greek.txt");

            // Same content is now encoded as UTF-16
            string unicodeValues = encoding.GetString(codePageValues);

            // Show that the text content is still intact in Unicode string
            // (Add a reference to System.Windows.Forms.dll)
            System.Windows.Forms.MessageBox.Show(unicodeValues);

            // Same content "ψυχή" is stored as UTF-8
            System.IO.File.WriteAllText(@"greek_unicode.txt", unicodeValues);

            // Conversion is complete. Show the bytes to prove the conversion.
            Console.WriteLine("8-bit encoding byte values:");
            foreach (byte b in codePageValues)
            {
                Console.Write("{0:X}-", b);
            }

            Console.WriteLine();
            Console.WriteLine("Unicode values:");
            string unicodeString = System.IO.File.ReadAllText("greek_unicode.txt");

            System.Globalization.TextElementEnumerator enumerator =
                System.Globalization.StringInfo.GetTextElementEnumerator(unicodeString);
            while (enumerator.MoveNext())
            {
                string s = enumerator.GetTextElement();
                int    i = Char.ConvertToUtf32(s, 0);
                Console.Write("{0:X}-", i);
            }
            Console.WriteLine();

            // Keep the console window open in debug mode.
            Console.Write("Press any key to exit.");
            Console.ReadKey();
        }