private unsafe LazyStringValue GetLazyString(StringSegment field, bool longLived) { var state = new JsonParserState(); var maxByteCount = Encodings.Utf8.GetMaxByteCount(field.Length); int escapePositionsSize = JsonParserState.FindEscapePositionsMaxSize(field); int memorySize = maxByteCount + escapePositionsSize; var memory = longLived ? GetLongLivedMemory(memorySize) : GetMemory(memorySize); fixed(char *pField = field.Buffer) { var address = memory.Address; var actualSize = Encodings.Utf8.GetBytes(pField + field.Offset, field.Length, address, memory.SizeInBytes); state.FindEscapePositionsIn(address, actualSize, escapePositionsSize); state.WriteEscapePositionsTo(address + actualSize); LazyStringValue result = longLived == false?AllocateStringValue(field, address, actualSize) : new LazyStringValue(field, address, actualSize, this); result.AllocatedMemoryData = memory; if (state.EscapePositions.Count > 0) { result.EscapePositions = state.EscapePositions.ToArray(); } return(result); } }
public unsafe int WriteValue(string str, out BlittableJsonToken token, UsageMode mode = UsageMode.None) { if (_intBuffer == null) { _intBuffer = new FastList <int>(); } var escapePositionsMaxSize = JsonParserState.FindEscapePositionsMaxSize(str); int size = Encodings.Utf8.GetMaxByteCount(str.Length) + escapePositionsMaxSize; AllocatedMemoryData buffer = null; try { buffer = _context.GetMemory(size); fixed(char *pChars = str) { var stringSize = Encodings.Utf8.GetBytes(pChars, str.Length, buffer.Address, size); JsonParserState.FindEscapePositionsIn(_intBuffer, buffer.Address, stringSize, escapePositionsMaxSize); return(WriteValue(buffer.Address, stringSize, _intBuffer, out token, mode, null)); } } finally { if (buffer != null) { _context.ReturnMemory(buffer); } } }
private void SetStringBuffer(string str) { // max possible size - we avoid using GetByteCount because profiling showed it to take 2% of runtime // the buffer might be a bit longer, but we'll reuse it, and it is better than the computing cost int byteCount = Encodings.Utf8.GetMaxByteCount(str.Length); int escapePositionsSize = JsonParserState.FindEscapePositionsMaxSize(str, out _); // If we do not have a buffer or the buffer is too small, return the memory and get more. var size = byteCount + escapePositionsSize; if (_currentStateBuffer == null || _currentStateBuffer.SizeInBytes < size) { if (_currentStateBuffer != null) { _ctx.ReturnMemory(_currentStateBuffer); } _currentStateBuffer = _ctx.GetMemory(size); Debug.Assert(_currentStateBuffer != null && _currentStateBuffer.Address != null); } _state.StringBuffer = _currentStateBuffer.Address; fixed(char *pChars = str) { _state.StringSize = Encodings.Utf8.GetBytes(pChars, str.Length, _state.StringBuffer, byteCount); _state.CompressedSize = null; // don't even try _state.FindEscapePositionsIn(_state.StringBuffer, ref _state.StringSize, escapePositionsSize); var escapePos = _state.StringBuffer + _state.StringSize; _state.WriteEscapePositionsTo(escapePos); } }
public static ByteStringContext <ByteStringMemoryCache> .InternalScope GetSliceFromId <TTransaction>( TransactionOperationContext <TTransaction> context, string id, out Slice idSlice, byte?separator = null) where TTransaction : RavenTransaction { if (_jsonParserState == null) { _jsonParserState = new JsonParserState(); } _jsonParserState.Reset(); var strLength = id.Length; var maxStrSize = Encoding.GetMaxByteCount(strLength); var escapePositionsSize = JsonParserState.FindEscapePositionsMaxSize(id, out _); if (strLength > MaxIdSize) { ThrowDocumentIdTooBig(id); } var internalScope = context.Allocator.Allocate( maxStrSize // this buffer is allocated to also serve the GetSliceFromUnicodeKey + sizeof(char) * id.Length + escapePositionsSize + (separator != null ? 1 : 0), out var buffer); idSlice = new Slice(buffer); for (var i = 0; i < id.Length; i++) { var ch = id[i]; if (ch > 127) // not ASCII, use slower mode { strLength = ReadFromUnicodeKey(id, buffer, maxStrSize, separator); goto Finish; } if ((ch >= 65) && (ch <= 90)) { buffer.Ptr[i] = (byte)(ch | 0x20); } else { buffer.Ptr[i] = (byte)ch; } } _jsonParserState.FindEscapePositionsIn(buffer.Ptr, ref strLength, escapePositionsSize); if (separator != null) { buffer.Ptr[strLength] = separator.Value; strLength++; } Finish: buffer.Truncate(strLength); return(internalScope); }
public static ByteStringContext.InternalScope GetLowerIdSliceAndStorageKey <TTransaction>( TransactionOperationContext <TTransaction> context, string str, out Slice lowerIdSlice, out Slice idSlice) where TTransaction : RavenTransaction { // Because we need to also store escape positions for the key when we store it // we need to store it as a lazy string value. // But lazy string value has two lengths, one is the string length, and the other // is the actual data size with the escape positions // In order to resolve this, we process the key to find escape positions, then store it // in the table using the following format: // // [var int - string len, string bytes, number of escape positions, escape positions] // // The total length of the string is stored in the actual table (and include the var int size // prefix. if (_jsonParserState == null) { _jsonParserState = new JsonParserState(); } _jsonParserState.Reset(); int strLength = str.Length; int maxStrSize = Encoding.GetMaxByteCount(strLength); int idSize = JsonParserState.VariableSizeIntSize(strLength); int escapePositionsSize = JsonParserState.FindEscapePositionsMaxSize(str); var scope = context.Allocator.Allocate(maxStrSize // lower key + idSize // the size of var int for the len of the key + maxStrSize // actual key + escapePositionsSize, out ByteString buffer); byte *ptr = buffer.Ptr; fixed(char *pChars = str) { for (var i = 0; i < strLength; i++) { uint ch = pChars[i]; // PERF: Trick to avoid multiple compare instructions on hot loops. // This is the same as (ch >= 65 && ch <= 90) if (ch - 65 <= 90 - 65) { ptr[i] = (byte)(ch | 0x20); } else { if (ch > 127) // not ASCII, use slower mode { goto UnlikelyUnicode; } ptr[i] = (byte)ch; } ptr[i + idSize + maxStrSize] = (byte)ch; } _jsonParserState.FindEscapePositionsIn(ptr, strLength, escapePositionsSize); } var writePos = ptr + maxStrSize; JsonParserState.WriteVariableSizeInt(ref writePos, strLength); escapePositionsSize = _jsonParserState.WriteEscapePositionsTo(writePos + strLength); idSize = escapePositionsSize + strLength + idSize; Slice.External(context.Allocator, ptr + maxStrSize, idSize, out idSlice); Slice.External(context.Allocator, ptr, str.Length, out lowerIdSlice); return(scope); UnlikelyUnicode: scope.Dispose(); return(UnicodeGetLowerIdAndStorageKey(context, str, out lowerIdSlice, out idSlice, maxStrSize, escapePositionsSize)); }
public unsafe void ReadDataTypesTest() { using (var context = new JsonOperationContext(1024, 1024 * 4, SharedMultipleUseFlag.None)) { BlittableJsonReaderObject embeddedReader; using (var builder = new ManualBlittableJsonDocumentBuilder <UnmanagedWriteBuffer>(context)) { builder.Reset(BlittableJsonDocumentBuilder.UsageMode.None); builder.StartWriteObjectDocument(); builder.StartWriteObject(); builder.WritePropertyName("Value"); builder.WriteValue(1000); builder.WriteObjectEnd(); builder.FinalizeDocument(); embeddedReader = builder.CreateReader(); } using (var builder = new ManualBlittableJsonDocumentBuilder <UnmanagedWriteBuffer>(context)) { var lonEscapedCharsString = string.Join(",", Enumerable.Repeat("\"Cool\"", 200).ToArray()); var longEscapedCharsAndNonAsciiString = string.Join(",", Enumerable.Repeat("\"מגניב\"", 200).ToArray()); builder.Reset(BlittableJsonDocumentBuilder.UsageMode.None); builder.StartWriteObjectDocument(); builder.StartWriteObject(); builder.WritePropertyName("FloatMin"); builder.WriteValue(float.MinValue); builder.WritePropertyName("FloatMax"); builder.WriteValue(float.MaxValue); builder.WritePropertyName("UshortMin"); builder.WriteValue(ushort.MinValue); builder.WritePropertyName("UshortMax"); builder.WriteValue(ushort.MaxValue); builder.WritePropertyName("UintMin"); builder.WriteValue(uint.MinValue); builder.WritePropertyName("UintMax"); builder.WriteValue(uint.MaxValue); builder.WritePropertyName("DoubleMin"); builder.WriteValue(double.MinValue); builder.WritePropertyName("DoubleMax"); builder.WriteValue(double.MaxValue); builder.WritePropertyName("LongMin"); builder.WriteValue(long.MinValue); builder.WritePropertyName("LongMax"); builder.WriteValue(long.MaxValue); builder.WritePropertyName("StringEmpty"); builder.WriteValue(string.Empty); builder.WritePropertyName("StringSimple"); builder.WriteValue("StringSimple"); builder.WritePropertyName("StringEscapedChars"); builder.WriteValue("\"Cool\""); builder.WritePropertyName("StringLongEscapedChars"); builder.WriteValue(lonEscapedCharsString); builder.WritePropertyName("StringEscapedCharsAndNonAscii"); builder.WriteValue(longEscapedCharsAndNonAsciiString); var lsvString = "\"fooאbar\""; var lsvStringBytes = Encoding.UTF8.GetBytes(lsvString); fixed(byte *b = lsvStringBytes) { var escapePositionsMaxSize = JsonParserState.FindEscapePositionsMaxSize(lsvString); var lsv = context.AllocateStringValue(null, b, lsvStringBytes.Length); var escapePositions = new FastList <int>(); var len = lsvStringBytes.Length; JsonParserState.FindEscapePositionsIn(escapePositions, b, ref len, escapePositionsMaxSize); lsv.EscapePositions = escapePositions.ToArray(); builder.WritePropertyName("LSVString"); builder.WriteValue(lsv); } builder.WritePropertyName("Embedded"); builder.WriteEmbeddedBlittableDocument(embeddedReader); builder.WriteObjectEnd(); builder.FinalizeDocument(); var reader = builder.CreateReader(); reader.BlittableValidation(); Assert.Equal(17, reader.Count); Assert.Equal(float.MinValue, float.Parse(reader["FloatMin"].ToString(), CultureInfo.InvariantCulture)); Assert.Equal(float.MaxValue, float.Parse(reader["FloatMax"].ToString(), CultureInfo.InvariantCulture)); Assert.Equal(ushort.MinValue, ushort.Parse(reader["UshortMin"].ToString(), CultureInfo.InvariantCulture)); Assert.Equal(ushort.MaxValue, ushort.Parse(reader["UshortMax"].ToString(), CultureInfo.InvariantCulture)); Assert.Equal(uint.MinValue, uint.Parse(reader["UintMin"].ToString(), CultureInfo.InvariantCulture)); Assert.Equal(uint.MaxValue, uint.Parse(reader["UintMax"].ToString(), CultureInfo.InvariantCulture)); Assert.Equal(double.MinValue, double.Parse(reader["DoubleMin"].ToString(), CultureInfo.InvariantCulture)); Assert.Equal(double.MaxValue, double.Parse(reader["DoubleMax"].ToString(), CultureInfo.InvariantCulture)); Assert.Equal(long.MinValue, long.Parse(reader["LongMin"].ToString(), CultureInfo.InvariantCulture)); Assert.Equal(long.MaxValue, long.Parse(reader["LongMax"].ToString(), CultureInfo.InvariantCulture)); Assert.Equal(string.Empty, reader["StringEmpty"].ToString()); Assert.Equal("StringSimple", reader["StringSimple"].ToString()); Assert.Equal("\"Cool\"", reader["StringEscapedChars"].ToString()); Assert.Equal(lonEscapedCharsString, reader["StringLongEscapedChars"].ToString()); Assert.Equal(longEscapedCharsAndNonAsciiString, reader["StringEscapedCharsAndNonAscii"].ToString()); Assert.Equal(lsvString, reader["LSVString"].ToString()); Assert.Equal(1000, int.Parse((reader["Embedded"] as BlittableJsonReaderObject)["Value"].ToString(), CultureInfo.InvariantCulture)); } } }
private static ByteStringContext.InternalScope UnicodeGetLowerIdAndStorageKey <TTransaction>( TransactionOperationContext <TTransaction> context, string str, out Slice lowerIdSlice, out Slice idSlice) where TTransaction : RavenTransaction { // See comment in GetLowerIdSliceAndStorageKey for the format _jsonParserState.Reset(); var byteCount = Encoding.GetMaxByteCount(str.Length); var maxIdLenSize = JsonParserState.VariableSizeIntSize(byteCount); int strLength = str.Length; int escapePositionsSize = JsonParserState.FindEscapePositionsMaxSize(str); var scope = context.Allocator.Allocate( sizeof(char) * strLength // for the lower calls + byteCount // lower ID + maxIdLenSize // the size of var int for the len of the ID + byteCount // actual ID + escapePositionsSize, out ByteString buffer); fixed(char *pChars = str) { var size = Encoding.GetBytes(pChars, strLength, buffer.Ptr, byteCount); _jsonParserState.FindEscapePositionsIn(buffer.Ptr, size, escapePositionsSize); var destChars = (char *)buffer.Ptr; for (var i = 0; i < strLength; i++) { destChars[i] = char.ToLowerInvariant(pChars[i]); } byte *lowerId = buffer.Ptr + strLength * sizeof(char); int lowerSize = Encoding.GetBytes(destChars, strLength, lowerId, byteCount); if (lowerSize > 512) { ThrowDocumentIdTooBig(str); } byte *id = buffer.Ptr + strLength * sizeof(char) + byteCount; byte *writePos = id; int idSize = Encoding.GetBytes(pChars, strLength, writePos + maxIdLenSize, byteCount); var actualIdLenSize = JsonParserState.VariableSizeIntSize(idSize); if (actualIdLenSize < maxIdLenSize) { var movePtr = maxIdLenSize - actualIdLenSize; id += movePtr; writePos += movePtr; } JsonParserState.WriteVariableSizeInt(ref writePos, idSize); escapePositionsSize = _jsonParserState.WriteEscapePositionsTo(writePos + idSize); idSize += escapePositionsSize + maxIdLenSize; Slice.External(context.Allocator, id, idSize, out idSlice); Slice.External(context.Allocator, lowerId, lowerSize, out lowerIdSlice); return(scope); } }
public static ByteStringContext.InternalScope GetLowerIdSliceAndStorageKey <TTransaction>( TransactionOperationContext <TTransaction> context, string str, out Slice lowerIdSlice, out Slice idSlice) where TTransaction : RavenTransaction { // Because we need to also store escape positions for the key when we store it // we need to store it as a lazy string value. // But lazy string value has two lengths, one is the string length, and the other // is the actual data size with the escape positions // In order to resolve this, we process the key to find escape positions, then store it // in the table using the following format: // // [var int - string len, string bytes, number of escape positions, escape positions] // // The total length of the string is stored in the actual table (and include the var int size // prefix. if (_jsonParserState == null) { _jsonParserState = new JsonParserState(); } _jsonParserState.Reset(); int originalStrLength = str.Length; int strLength = originalStrLength; if (strLength > MaxIdSize) { ThrowDocumentIdTooBig(str); } int escapePositionsSize = JsonParserState.FindEscapePositionsMaxSize(str, out var escapedCount); /* * add the size of all control characters * this is to treat case when we have 2+ control character in a row * GetMaxByteCount returns smaller size than the actual size with escaped control characters * For example: string with two control characters such as '\0\0' will be converted to '\u0000\u0000' (another example: '\b\b' => '\u000b\u000b') * string size = 2, GetMaxByteCount = 9, converted string size = 12, maxStrSize = 19 */ var maxStrSize = Encoding.GetMaxByteCount(strLength) + JsonParserState.ControlCharacterItemSize * escapedCount; var originalMaxStrSize = maxStrSize; int idSize = JsonParserState.VariableSizeIntSize(maxStrSize); var scope = context.Allocator.Allocate(maxStrSize // lower key + idSize // the size of var int for the len of the key + maxStrSize // actual key + escapePositionsSize, out ByteString buffer); byte *ptr = buffer.Ptr; fixed(char *pChars = str) { for (var i = 0; i < strLength; i++) { uint ch = pChars[i]; // PERF: Trick to avoid multiple compare instructions on hot loops. // This is the same as (ch >= 65 && ch <= 90) if (ch - 65 <= 90 - 65) { ptr[i] = (byte)(ch | 0x20); } else { if (ch > 127) // not ASCII, use slower mode { goto UnlikelyUnicode; } ptr[i] = (byte)ch; } ptr[i + idSize + maxStrSize] = (byte)ch; } _jsonParserState.FindEscapePositionsIn(ptr, ref strLength, escapePositionsSize); if (strLength != originalStrLength) { var anotherStrLength = originalStrLength; _jsonParserState.FindEscapePositionsIn(ptr + idSize + maxStrSize, ref anotherStrLength, escapePositionsSize); #if DEBUG if (strLength != anotherStrLength) { throw new InvalidOperationException($"String length mismatch between Id ({str}) and it's lowercased counterpart after finding escape positions. Original: {anotherStrLength}. Lowercased: {strLength}"); } #endif } } var writePos = ptr + maxStrSize; Debug.Assert(strLength <= originalMaxStrSize, $"Calculated {nameof(originalMaxStrSize)} value {originalMaxStrSize}, was smaller than actually {nameof(strLength)} value {strLength}"); // in case there were no control characters the idSize could be smaller var sizeDifference = idSize - JsonParserState.VariableSizeIntSize(strLength); writePos += sizeDifference; idSize -= sizeDifference; JsonParserState.WriteVariableSizeInt(ref writePos, strLength); escapePositionsSize = _jsonParserState.WriteEscapePositionsTo(writePos + strLength); idSize = escapePositionsSize + strLength + idSize; Slice.External(context.Allocator, ptr + maxStrSize + sizeDifference, idSize, out idSlice); Slice.External(context.Allocator, ptr, strLength, out lowerIdSlice); return(scope); UnlikelyUnicode: scope.Dispose(); return(UnicodeGetLowerIdAndStorageKey(context, str, out lowerIdSlice, out idSlice, maxStrSize, escapePositionsSize)); }