public static void Check() { var s = "a"; var utf8 = Encoding.UTF8.GetBytes(s); #if !ArrayImplementation var utf16 = Encoding.Unicode.GetBytes(s); #else var utf16 = Copy8To16(Encoding.Unicode.GetBytes(s)); #endif // JIT の時間カウントしないように最初に1度アクセス var s1 = new Utf8String(utf8); var i1 = s1.Indexes; var c1 = s1[i1.First()]; var s2 = new Utf16String(utf16); var i2 = s2.Indexes; var c2 = s2[i2.First()]; Console.WriteLine("絵文字あり"); Check("ASCII: abcABC, Latin-1: ÀÁÂÃÄÅ, ελληνικά кириллица עִברִית ひらがな 한글 漢字, combining: áあ゙, emoji: 👩👩🏽👨👨👨👨👨👨👨👨🏻👩🏿👦🏽👦🏼"); Console.WriteLine("絵文字なし"); Check("ASCII: abcABC, Latin-1: ÀÁÂÃÄÅ, ελληνικά кириллица עִברִית ひらがな 한글 漢字, combining: áあ゙"); Console.WriteLine("日本語"); Check("寿限無、寿限無 五劫の擦り切れ 海砂利水魚の 水行末 雲来末 風来末 食う寝る処に住む処 藪ら柑子の藪柑子 パイポパイポパイポのシューリンガン シューリンガンのグーリンダイ グーリンダイのポンポコピーのポンポコナーの 長久命の長助"); Console.WriteLine("絵文字のみ"); Check("👩👩🏽👨👨👨👨👨👨👨👨🏻👩🏿👦🏽👦🏼🐀🐁🐂🐃🐄🐅🐆🐇🐈🐉🐊🐋🐌🐍🐎🐏🐐🐑🐒🐓🐔🐕🐖🐗🐘🐙🐚🐛🐜🐝🐞🐟🐠🐡🐢🐣🐤🐥🐦🐧🐨🐩🐪🐫🐬"); Console.WriteLine("latin-1"); Check("!\"#$%&'() 1234567890 AQWSEDRFTGYHUJIKOLP+@,./\\<>?_°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"); Console.WriteLine("ASCII"); Check("!\"#$%&'() 1234567890 AQWSEDRFTGYHUJIKOLP+@,./\\<>?_"); }
static void Check(string s) { const int N = 100; // 計測用に長めの文字列にしたいので 2^10 倍につなぐ for (int i = 0; i < 10; i++) { s = s + s; } var utf8 = Encoding.UTF8.GetBytes(s); #if !ArrayImplementation var utf16 = Encoding.Unicode.GetBytes(s); #else var utf16 = Copy8To16(Encoding.Unicode.GetBytes(s)); #endif GC.Collect(); for (int n = 0; n < 3; n++) { Console.WriteLine("---- " + n + " ----"); using (SW.New("corefxlab code point: ")) { for (int i = 0; i < N; i++) { foreach (var c in new LabUtf8String(utf8)) { ; } } } using (SW.New(" utf-8 code point: ")) { for (int i = 0; i < N; i++) { foreach (var c in new Utf8String(utf8)) { ; } } } using (SW.New(" utf-16 code point: ")) { for (int i = 0; i < N; i++) { foreach (var c in new Utf16String(utf16)) { ; } } } using (SW.New(" utf-8 index : ")) { for (int i = 0; i < N; i++) { var x = new Utf8String(utf8); foreach (var index in x.Indexes) { var c = x[index]; } } } using (SW.New(" utf-16 index : ")) { for (int i = 0; i < N; i++) { var x = new Utf16String(utf16); foreach (var index in x.Indexes) { var c = x[index]; } } } } }
public static void AllocationCheck() { const int N = 10000; var str = "aáαℵあáあ゙亜👩👩🏽👨🏻👩🏿👦🏽👦🏼"; #if USE_UTF8 var encoding = System.Text.Encoding.UTF8; #else var encoding = System.Text.Encoding.Unicode; #endif var data = encoding.GetBytes(str); Console.WriteLine(data.Length); Console.WriteLine(str); Console.WriteLine(str.Length); GC.Collect(); { Console.WriteLine("--------"); var begin = GC.GetTotalMemory(false); Console.WriteLine(begin); (int total, int ascii, int latin1, int utf16, int surrogatePair)len = (0, 0, 0, 0, 0); for (int i = 0; i < N; i++) { var s = new MyString(data); len = (0, 0, 0, 0, 0); foreach (var x in s) { len.total++; if (x.Value < 0x80) { len.ascii++; } else if (x.Value < 0x100) { len.latin1++; } else if (x.Value < 0x10000) { len.utf16++; } else { len.surrogatePair++; } } } var end = GC.GetTotalMemory(false); Console.WriteLine(end); Console.WriteLine($"{end - begin} {(end - begin) / N}"); Console.WriteLine("\t" + len); } GC.Collect(); { Console.WriteLine("--------"); var begin = GC.GetTotalMemory(false); Console.WriteLine(begin); (int total, int ascii, int latin1, int utf16, int surrogatePair)len = (0, 0, 0, 0, 0); for (int i = 0; i < N; i++) { var s = encoding.GetString(data); len = (0, 0, 0, 0, 0); foreach (var x in s) { if (!char.IsLowSurrogate(x)) { len.total++; } if (x < 0x80) { len.ascii++; } else if (x < 0x100) { len.latin1++; } else if (!char.IsSurrogate(x)) { len.utf16++; } else if (char.IsHighSurrogate(x)) { len.surrogatePair++; } } } var end = GC.GetTotalMemory(false); Console.WriteLine(end); Console.WriteLine($"{end - begin} {(end - begin) / N}"); Console.WriteLine("\t" + len); } }
public static void AllocationCheck() { const int N = 10000; var str = "aáαℵあáあ゙亜👩👩🏽👨🏻👩🏿👦🏽👦🏼"; #if USE_UTF8 var encoding = System.Text.Encoding.UTF8; #else var encoding = System.Text.Encoding.Unicode; #endif var data = encoding.GetBytes(str); Console.WriteLine(data.Length); Console.WriteLine(str); Console.WriteLine(str.Length); GC.Collect(); { Console.WriteLine("--------"); var begin = GC.GetTotalMemory(false); Console.WriteLine(begin); (int total, int ascii, int latin1, int utf16, int surrogatePair) len = (0, 0, 0, 0, 0); for (int i = 0; i < N; i++) { var s = new MyString(data); len = (0, 0, 0, 0, 0); foreach (var x in s) { len.total++; if (x.Value < 0x80) len.ascii++; else if (x.Value < 0x100) len.latin1++; else if (x.Value < 0x10000) len.utf16++; else len.surrogatePair++; } } var end = GC.GetTotalMemory(false); Console.WriteLine(end); Console.WriteLine($"{end - begin} {(end - begin) / N}"); Console.WriteLine("\t" + len); } GC.Collect(); { Console.WriteLine("--------"); var begin = GC.GetTotalMemory(false); Console.WriteLine(begin); (int total, int ascii, int latin1, int utf16, int surrogatePair) len = (0, 0, 0, 0, 0); for (int i = 0; i < N; i++) { var s = encoding.GetString(data); len = (0, 0, 0, 0, 0); foreach (var x in s) { if (!char.IsLowSurrogate(x)) len.total++; if (x < 0x80) len.ascii++; else if (x < 0x100) len.latin1++; else if (!char.IsSurrogate(x)) len.utf16++; else if (char.IsHighSurrogate(x)) len.surrogatePair++; } } var end = GC.GetTotalMemory(false); Console.WriteLine(end); Console.WriteLine($"{end - begin} {(end - begin) / N}"); Console.WriteLine("\t" + len); } }
static void Check(string s) { const int N = 100; // 計測用に長めの文字列にしたいので 2^10 倍につなぐ for (int i = 0; i < 10; i++) { s = s + s; } var utf8 = Encoding.UTF8.GetBytes(s); #if !ArrayImplementation var utf16 = Encoding.Unicode.GetBytes(s); #else var utf16 = Copy8To16(Encoding.Unicode.GetBytes(s)); #endif GC.Collect(); for (int n = 0; n < 3; n++) { Console.WriteLine("---- " + n + " ----"); using (SW.New("corefxlab code point: ")) { for (int i = 0; i < N; i++) foreach (var c in new LabUtf8String(utf8)) ; } using (SW.New(" utf-8 code point: ")) { for (int i = 0; i < N; i++) foreach (var c in new Utf8String(utf8)) ; } using (SW.New(" utf-16 code point: ")) { for (int i = 0; i < N; i++) foreach (var c in new Utf16String(utf16)) ; } using (SW.New(" utf-8 index : ")) { for (int i = 0; i < N; i++) { var x = new Utf8String(utf8); foreach (var index in x.Indexes) { var c = x[index]; } } } using (SW.New(" utf-16 index : ")) { for (int i = 0; i < N; i++) { var x = new Utf16String(utf16); foreach (var index in x.Indexes) { var c = x[index]; } } } } }