public void HashesSameForTheSameSubstrings() { const int len = 50; // two copies of the same string byte[] bytes = new byte[len * 2]; for (int i = 0; i < len; i++) { // 0x20 is a spacebar, writing explicitly so // the value is more predictable bytes[i] = unchecked ((byte)(0x20 + i)); bytes[i + len] = bytes[i]; } Utf8String sFromBytes = new Utf8String(bytes); Utf8String s1FromBytes = sFromBytes.Substring(0, len); Utf8String s2FromBytes = sFromBytes.Substring(len, len); unsafe { fixed(byte *pinnedBytes = bytes) { Utf8String sFromSpan = new Utf8String(new Span <byte>(pinnedBytes, len * 2)); Utf8String s1FromSpan = sFromSpan.Substring(0, len); Utf8String s2FromSpan = sFromSpan.Substring(len, len); TestHashesSameForEquivalentString(s1FromBytes, s2FromBytes); TestHashesSameForEquivalentString(s1FromSpan, s2FromSpan); TestHashesSameForEquivalentString(s1FromSpan, s2FromBytes); } } }
private static Utf8String ParseHeaderLine(Utf8String headerString, out Utf8StringPair header) { Utf8String headerName; Utf8String headerValue; //TODO: this will be simplified once we have TrySubstringTo/From accepting strings if (!headerString.TrySubstringTo((byte)':', out headerName)) { throw new ArgumentException("headerString"); } headerString.TrySubstringFrom((byte)':', out headerString); if (headerString.Length > 0) { headerString = headerString.Substring(1); } if (!headerString.TrySubstringTo((byte)'\r', out headerValue)) { throw new ArgumentException("headerString"); } headerString.TrySubstringFrom((byte)'\n', out headerString); if (headerString.Length > 0) { headerString = headerString.Substring(1); } header = new Utf8StringPair(headerName, headerValue); return(headerString); }
public unsafe void SubstringTrimOneCharacterOnEachSideConstructFromSpan() { TestCase[] testCases = new TestCase[] { new TestCase(GetRandomString(5, 32, 126), "Short ASCII string", 50000000), new TestCase(GetRandomString(5, 32, 0xD7FF), "Short string", 50000000), new TestCase(GetRandomString(50000, 32, 126), "Long ASCII string", 50000000), new TestCase(GetRandomString(50000, 32, 0xD7FF), "Long string", 50000000) }; foreach (TestCase testData in testCases) { string s = testData.String; Utf8String utf8s = new Utf8String(s); fixed(byte *bytes = utf8s.CopyBytes()) { utf8s = new Utf8String(new Span <byte>(bytes, utf8s.Length)); int iterations = testData.Iterations; _timer.Restart(); while (iterations-- != 0) { Utf8String result = utf8s.Substring(1, utf8s.Length - 2); } PrintTime(testData); } } }
// We need to set the correct collection for the actual material path that is loaded // before actually loading the file. private bool MtrlLoadHandler(Utf8String split, Utf8String path, ResourceManager *resourceManager, SeFileDescriptor *fileDescriptor, int priority, bool isSync, out byte ret) { ret = 0; if (fileDescriptor->ResourceHandle->FileType != ResourceType.Mtrl) { return(false); } var lastUnderscore = split.LastIndexOf(( byte )'_'); var name = lastUnderscore == -1 ? split.ToString() : split.Substring(0, lastUnderscore).ToString(); if (Penumbra.CollectionManager.ByName(name, out var collection)) { #if DEBUG PluginLog.Verbose("Using MtrlLoadHandler with collection {$Split:l} for path {$Path:l}.", name, path); #endif SetCollection(path, collection); } else { #if DEBUG PluginLog.Verbose("Using MtrlLoadHandler with no collection for path {$Path:l}.", path); #endif } // Force isSync = true for this call. I don't really understand why, // or where the difference even comes from. // Was called with True on my client and with false on other peoples clients, // which caused problems. ret = Penumbra.ResourceLoader.DefaultLoadResource(path, resourceManager, fileDescriptor, priority, true); PathCollections.TryRemove(path, out _); return(true); }
private void TestHashesSameForEquivalentString(Utf8String a, Utf8String b) { // for sanity Assert.Equal(a.Length, b.Length); TestHelper.Validate(a, b); for (int i = 0; i < a.Length; i++) { Utf8String prefixOfA = a.Substring(i, a.Length - i); Utf8String prefixOfB = b.Substring(i, b.Length - i); // sanity TestHelper.Validate(prefixOfA, prefixOfB); Assert.Equal(prefixOfA.GetHashCode(), prefixOfB.GetHashCode()); // for all suffixes Utf8String suffixOfA = a.Substring(a.Length - i, i); Utf8String suffixOfB = b.Substring(b.Length - i, i); TestHelper.Validate(suffixOfA, suffixOfB); } }
private static IEnumerable <Utf8String> SplitIntoWords(Utf8String text) { int startIndex = 0; while (true) { var span = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.Add(ref Unsafe.AsRef <byte>(in text.GetPinnableReference()), startIndex), text.Length - startIndex); int indexOfSpace = span.IndexOf((byte)' '); if (indexOfSpace < 0) { yield return(text.Substring(startIndex).Trim()); // return last of the text yield break; } yield return(text.Substring(startIndex, indexOfSpace).Trim()); startIndex += indexOfSpace + 1; // we know we can skip over a space character } }
private Utf8String ReadStringValue() { _index++; var count = _index; do { while ((byte)_str[count] != '"') { count++; } count++; } while (AreNumOfBackSlashesAtEndOfStringOdd(count - 2)); var strLength = count - _index; var resultString = _str.Substring(_index, strLength - 1); _index += strLength; SkipEmpty(); return(resultString); }
private static void NoAllocationWithForeachInternal(Utf8String s, int n) { var start = GC.GetTotalMemory(false); for (int i = 0; i < n; i++) { foreach (var x in s.CodePoints) { ; } if (s.Length > 3) { var sub1 = s.Substring(1, 1); var sub2 = s.Substring(2); var sub3 = s.Substring(3); } } var end = GC.GetTotalMemory(false); Assert.Equal(start, end); }
public void SubstringTrimOneCharacterOnEachSideConstructFromByteArray(int length, int minCodePoint, int maxCodePoint, string description, bool useInnerLoop = false) { string s = GetRandomString(length, minCodePoint, maxCodePoint); Utf8String utf8s = new Utf8String(s); utf8s = new Utf8String(utf8s.CopyBytes()); foreach (var iteration in Benchmark.Iterations) { using (iteration.StartMeasurement()) { for (int i = 0; i < (useInnerLoop ? Benchmark.InnerIterationCount : 1); i++) { Utf8String result = utf8s.Substring(1, utf8s.Length - 2); } } } }
private void TestHashesSameForEquivalentString(Utf8String a, Utf8String b) { // for sanity Assert.Equal(a.Length, b.Length); Assert.Equal(a, b); for (int i = 0; i < a.Length; i++) { Utf8String prefixOfA = a.Substring(i, a.Length - i); Utf8String prefixOfB = b.Substring(i, b.Length - i); // sanity Assert.Equal(prefixOfA, prefixOfB); Assert.Equal(prefixOfA.GetHashCode(), prefixOfB.GetHashCode()); // for all suffixes Utf8String suffixOfA = a.Substring(a.Length - i, i); Utf8String suffixOfB = b.Substring(b.Length - i, i); Assert.Equal(suffixOfA, suffixOfB); } }
public unsafe void SubstringTrimOneCharacterOnEachSideConstructFromSpan() { foreach (StringWithDescription testData in StringsWithDescription()) { string s = testData.String; Utf8String utf8s = new Utf8String(s); fixed (byte* bytes = utf8s.CopyBytes()) { utf8s = new Utf8String(new ByteSpan(bytes, utf8s.Length)); int iterations = testData.Iterations; _timer.Restart(); while (iterations-- != 0) { Utf8String result = utf8s.Substring(1, utf8s.Length - 2); } PrintTime(testData); } } }
public unsafe void SubstringTrimOneCharacterOnEachSideConstructFromSpan() { TestCase[] testCases = new TestCase[] { new TestCase(GetRandomString(5, 32, 126), "Short ASCII string", 50000000), new TestCase(GetRandomString(5, 32, 0xD7FF), "Short string", 50000000), new TestCase(GetRandomString(50000, 32, 126), "Long ASCII string", 50000000), new TestCase(GetRandomString(50000, 32, 0xD7FF), "Long string", 50000000) }; foreach (TestCase testData in testCases) { string s = testData.String; Utf8String utf8s = new Utf8String(s); fixed (byte* bytes = utf8s.CopyBytes()) { utf8s = new Utf8String(new Span<byte>(bytes, utf8s.Length)); int iterations = testData.Iterations; _timer.Restart(); while (iterations-- != 0) { Utf8String result = utf8s.Substring(1, utf8s.Length - 2); } PrintTime(testData); } } }
public static void WriteStringLiteralUtf8(this IWriter writer, Utf8String value) { writer.Write((byte)'"'); var start = 0; var i = 0; for (; i < value.Length; i++) { var b = value[i].Value; int flag; byte x; switch (b) { case 0x22: case 0x5C: flag = 2; x = b; break; case 0x08: flag = 2; x = (byte)'b'; break; case 0x0C: flag = 2; x = (byte)'f'; break; case 0x0A: flag = 2; x = (byte)'n'; break; case 0x0D: flag = 2; x = (byte)'r'; break; case 0x09: flag = 2; x = (byte)'t'; break; default: if (b <= 0x09) { flag = 0; x = (byte)(b + 0x30); } else if (b >= 0x0A && b <= 0x0F) { flag = 0; x = (byte)(b + 0x37); } else if (b >= 0x10 && b <= 0x19) { flag = 1; x = (byte)(b + 0x20); } else if (b >= 0x1A && b <= 0x1F) { flag = 1; x = (byte)(b + 0x27); } else { continue; } break; } if (start < i) { var slice = value.Substring(start, i - start); slice.CopyTo(writer.GetFreeBuffer(slice.Length).ToSpan()); writer.CommitBytes(slice.Length); } switch (flag) { case 0: // \u000x writer.Write(u000Utf8); writer.Write(x); break; case 1: // \u001x writer.Write(u001Utf8); writer.Write(x); break; case 2: // \x writer.Write((byte)'\\'); writer.Write(x); break; default: throw new Exception("unreachable"); } start = i + 1; } if (start < i) { var slice = value.Substring(start, i - start); slice.CopyTo(writer.GetFreeBuffer(slice.Length).ToSpan()); writer.CommitBytes(slice.Length); } writer.Write((byte)'"'); }
private static int GetNumOfBackSlashesAtEndOfString(Utf8String str) { var numOfBackSlashes = 0; while (str.EndsWith(new Utf8String("\\"))) { str = str.Substring(0, str.Length - 1); numOfBackSlashes++; } return numOfBackSlashes; }
public void HashesSameForTheSameSubstrings() { const int len = 50; // two copies of the same string byte[] bytes = new byte[len * 2]; for (int i = 0; i < len; i++) { // 0x20 is a spacebar, writing explicitly so // the value is more predictable bytes[i] = unchecked((byte)(0x20 + i)); bytes[i + len] = bytes[i]; } Utf8String sFromBytes = new Utf8String(bytes); Utf8String s1FromBytes = sFromBytes.Substring(0, len); Utf8String s2FromBytes = sFromBytes.Substring(len, len); unsafe { fixed (byte* pinnedBytes = bytes) { Utf8String sFromSpan = new Utf8String(new Span<byte>(pinnedBytes, len * 2)); Utf8String s1FromSpan = sFromSpan.Substring(0, len); Utf8String s2FromSpan = sFromSpan.Substring(len, len); TestHashesSameForEquivalentString(s1FromBytes, s2FromBytes); TestHashesSameForEquivalentString(s1FromSpan, s2FromSpan); TestHashesSameForEquivalentString(s1FromSpan, s2FromBytes); } } }
[InlineData(" !", false, 0, 0, 0)] // invalid character test w/ char < '0' public unsafe void ParseBool(string text, bool expectSuccess, int index, bool expectedValue, int expectedBytesConsumed) { bool result; bool parsedValue; int bytesConsumed; var utf8String = new Utf8String(text); byte[] utf8Bytes = utf8String.CopyBytes(); ReadOnlySpan<byte> utf8BytesSlice = new ReadOnlySpan<byte>(utf8Bytes); // System.String result = PrimitiveParser.TryParseBoolean(text, index, 'N', out parsedValue, out bytesConsumed); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); Assert.Equal(expectedBytesConsumed, bytesConsumed); // Utf8String result = PrimitiveParser.TryParseBoolean(utf8String.Substring(index), 'N', out parsedValue, out bytesConsumed); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); Assert.Equal(expectedBytesConsumed, bytesConsumed); // byte[] result = PrimitiveParser.TryParseBoolean(utf8Bytes, index, EncodingData.InvariantUtf8, 'N', out parsedValue, out bytesConsumed); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); Assert.Equal(expectedBytesConsumed, bytesConsumed); // ReadOnlySpan<byte> result = PrimitiveParser.TryParseBoolean(utf8Bytes.Slice(index), EncodingData.InvariantUtf8, 'N', out parsedValue, out bytesConsumed); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); Assert.Equal(expectedBytesConsumed, bytesConsumed); // byte* fixed (byte* arrayPointer = utf8Bytes) { result = PrimitiveParser.TryParseBoolean(arrayPointer, index, utf8Bytes.Length, EncodingData.InvariantUtf8, 'N', out parsedValue, out bytesConsumed); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); Assert.Equal(expectedBytesConsumed, bytesConsumed); } }
public static void Run() { var utf8RawData = new byte[] { 0x7B, 0x20, 0x22, 0x6B, 0x65, 0x79, 0x22, 0x3A, 0x20, 0x22, 0x61, 0xE3, 0x81, 0x82, 0xF0, 0x9F, 0x98, 0x80, 0x22, 0x20, 0x7D }; var utf16RawData = new char[] { '{', ' ', '"', 'k', 'e', 'y', '"', ':', ' ', '"', 'a', 'あ', (char)0xD83D, (char)0xDE00, '"', ' ', '}' }; // string 型 { // UTF-8 → UTF-16 の変換でヒープ確保が必要 var s1 = System.Text.Encoding.UTF8.GetString(utf8RawData); // string 型は char[] を受け取る場合でも、内部でコピーを作るのでヒープ確保発生 var s2 = new string(utf16RawData); // string.Substring もコピー発生 var sub = s1.Substring(10, 4); Console.WriteLine(sub); } // Utf8String 型 { // ヒープ確保しない実装 var s = new Utf8String(utf8RawData); // インデックスでの文字取得はできない。s[0] は byte 単位のアクセスになる // コード ポイントの取り出しには CodePoints を使う // foreach もすべて構造体で展開されるのでヒープ確保不要 foreach (var c in s.CodePoints) { Console.WriteLine(c); } // Substring もコピー不要な実装になっている var sub = s.Substring(10, 8); foreach (var c in sub.CodePoints) { Console.WriteLine(c); } } // string 型 { // 内部でコピーしているので… var s1 = new string(utf16RawData); var s2 = new string(utf16RawData); // 元データを書き換えても utf16RawData[0] = '['; utf16RawData[16] = ']'; // 影響は出ない Console.WriteLine(s1); // { "key": "aあ😀" } Console.WriteLine(s2); // { "key": "aあ😀" } } // Utf8String 型 { // データを共有しているので… var s1 = new Utf8String(utf8RawData); var s2 = new Utf8String(utf8RawData); //98, 227, 129, 132, 240, 159, 144, 136 // 元データを書き換えると utf8RawData[10] = 98; utf8RawData[11] = 227; utf8RawData[12] = 129; utf8RawData[13] = 132; utf8RawData[14] = 240; utf8RawData[15] = 159; utf8RawData[16] = 144; utf8RawData[17] = 136; // 影響がある Console.WriteLine(s1); // { "key": "bい🐈" } Console.WriteLine(s2); // { "key": "bい🐈" } Console.WriteLine(s1.Substring(10, 8)); // bい🐈 } }