/// <summary>Writes a char array encoded in UTF-8</summary> internal static unsafe void WriteChars(ref SliceWriter writer, char[] value, int offset, int count) { Contract.Requires(offset >= 0 && count >= 0); if (count == 0) { if (value == null) { // "00" writer.WriteByte(FdbTupleTypes.Nil); } else { // "02 00" writer.WriteByte2(FdbTupleTypes.Utf8, 0x00); } } else { fixed(char *chars = value) { if (TryWriteUnescapedUtf8String(ref writer, chars + offset, count)) { return; } } // the string contains \0 chars, we need to do it the hard way WriteNulEscapedBytes(ref writer, FdbTupleTypes.Utf8, Encoding.UTF8.GetBytes(value, 0, count)); } }
/// <summary>Writes an Int64 at the end, and advance the cursor</summary> /// <param name="writer">Target buffer</param> /// <param name="value">Signed QWORD, 64 bits, High Endian</param> public static void WriteInt64(ref SliceWriter writer, long value) { if (value <= 255) { if (value == 0) { // zero writer.WriteByte(FdbTupleTypes.IntZero); return; } if (value > 0) { // 1..255: frequent for array index writer.WriteByte2(FdbTupleTypes.IntPos1, (byte)value); return; } if (value > -256) { // -255..-1 writer.WriteByte2(FdbTupleTypes.IntNeg1, (byte)(255 + value)); return; } } WriteInt64Slow(ref writer, value); }
/// <summary>Writes an UInt8 at the end, and advance the cursor</summary> /// <param name="writer">Target buffer</param> /// <param name="value">Unsigned BYTE, 32 bits</param> public static void WriteInt8(ref SliceWriter writer, byte value) { if (value == 0) { // zero writer.WriteByte(FdbTupleTypes.IntZero); } else { // 1..255: frequent for array index writer.WriteByte2(FdbTupleTypes.IntPos1, value); } }
/// <summary>Writes a binary string</summary> public static void WriteBytes(ref SliceWriter writer, byte[] value) { if (value == null) { writer.WriteByte(FdbTupleTypes.Nil); } else { WriteNulEscapedBytes(ref writer, FdbTupleTypes.Bytes, value); } }
/// <summary>Writes an UInt64 at the end, and advance the cursor</summary> /// <param name="writer">Target buffer</param> /// <param name="value">Signed QWORD, 64 bits, High Endian</param> public static void WriteUInt64(ref SliceWriter writer, ulong value) { if (value <= 255) { if (value == 0) { // 0 writer.WriteByte(FdbTupleTypes.IntZero); } else { // 1..255 writer.WriteByte2(FdbTupleTypes.IntPos1, (byte)value); } } else { // >= 256 WriteUInt64Slow(ref writer, value); } }
/// <summary>Writes a string encoded in UTF-8</summary> public static unsafe void WriteString(ref SliceWriter writer, string value) { if (value == null) { // "00" writer.WriteByte(FdbTupleTypes.Nil); } else if (value.Length == 0) { // "02 00" writer.WriteByte2(FdbTupleTypes.Utf8, 0x00); } else { fixed(char *chars = value) { if (!TryWriteUnescapedUtf8String(ref writer, chars, value.Length)) { // the string contains \0 chars, we need to do it the hard way WriteNulEscapedBytes(ref writer, FdbTupleTypes.Utf8, Encoding.UTF8.GetBytes(value)); } } } }
/// <summary>Writes a null value at the end, and advance the cursor</summary> public static void WriteNil(ref SliceWriter writer) { writer.WriteByte(FdbTupleTypes.Nil); }
private static unsafe bool TryWriteUnescapedUtf8String(ref SliceWriter writer, char *chars, int count) { Contract.Requires(chars != null && count >= 0); // Several observations: // * Most strings will be keywords or ASCII-only with no zeroes. These can be copied directly to the buffer // * We will only attempt to optimze strings that don't have any 00 to escape to 00 FF. For these, we will fallback to converting to byte[] then escaping. // * Since .NET's strings are UTF-16, the max possible UNICODE value to encode is 0xFFFF, which takes 3 bytes in UTF-8 (EF BF BF) // * Most western europe languages have only a few non-ASCII chars here and there, and most of them will only use 2 bytes (ex: 'é' => 'C3 A9') // * More complex scripts with dedicated symbol pages (kanjis, arabic, ....) will take 2 or 3 bytes for each charecter. // We will first do a pass to check for the presence of 00 and non-ASCII chars // => if we find at least on 00, we fallback to escaping the result of Encoding.UTF8.GetBytes() // => if we find only ASCII (1..127) chars, we have an optimized path that will truncate the chars to bytes // => if not, we will use an UTF8Encoder to convert the string to UTF-8, in chunks, using a small buffer allocated on the stack #region First pass: look for \0 and non-ASCII chars // fastest way to check for non-ASCII, is to OR all the chars together, and look at bits 7 to 15. If they are not all zero, there is at least ONE non-ASCII char. // also, we abort as soon as we find a \0 char *ptr = chars; char *end = chars + count; char mask = '\0', c; while (ptr < end && (c = *ptr) != '\0') { mask |= c; ++ptr; } if (ptr < end) { return(false); // there is at least one \0 in the string } // bit 7-15 all unset means the string is pure ASCII if ((mask >> 7) == 0) { // => directly dump the chars to the buffer WriteUnescapedAsciiChars(ref writer, chars, count); return(true); } #endregion #region Second pass: encode the string to UTF-8, in chunks // Here we know that there is at least one unicode char, and that there are no \0 // We will tterate through the string, filling as much of the buffer as possible bool done; int remaining = count; ptr = chars; // We need at most 3 * CHUNK_SIZE to encode the chunk // > For small strings, we will allocated exactly string.Length * 3 bytes, and will be done in one chunk // > For larger strings, we will call encoder.Convert(...) until it says it is done. const int CHUNK_SIZE = 1024; int bufLen = Encoding.UTF8.GetMaxByteCount(Math.Min(count, CHUNK_SIZE)); byte * buf = stackalloc byte[bufLen]; // We can not really predict the final size of the encoded string, but: // * Western languages have a few chars that usually need 2 bytes. If we pre-allocate 50% more bytes, it should fit most of the time, without too much waste // * Eastern langauges will have all chars encoded to 3 bytes. If we also pre-allocated 50% more, we should only need one resize of the buffer (150% x 2 = 300%), which is acceptable writer.EnsureBytes(checked (2 + count + (count >> 1))); // preallocate 150% of the string + 2 bytes writer.UnsafeWriteByte(FdbTupleTypes.Utf8); var encoder = Encoding.UTF8.GetEncoder(); // note: encoder.Convert() tries to fill up the buffer as much as possible with complete chars, and will set 'done' to true when all chars have been converted. do { int charsUsed, bytesUsed; encoder.Convert(ptr, remaining, buf, bufLen, true, out charsUsed, out bytesUsed, out done); if (bytesUsed > 0) { writer.WriteBytes(buf, bytesUsed); } remaining -= charsUsed; ptr += charsUsed; }while (!done); Contract.Assert(remaining == 0 && ptr == end); // close the string writer.WriteByte(0x00); #endregion return(true); }
public async Task WriteLevelAsync(int level, IntPtr[] segment, CancellationToken ct) { ct.ThrowIfCancellationRequested(); if (m_jumpTable[level].Value > 0) { throw new InvalidOperationException("The level has already be written to this snapshot"); } var levelStart = checked (m_file.Length + (uint)m_writer.Position); //Console.WriteLine("## level " + level + " starts at " + levelStart); //TODO: ensure that we start on a PAGE? //Console.WriteLine("> Writing level " + level); // "LVL_" m_writer.WriteFixed32(SnapshotFormat.LEVEL_MAGIC_NUMBER); // Level Flags m_writer.WriteFixed32(0); //TODO: flags! // Level ID m_writer.WriteFixed32((uint)level); // Item count (always 2^level) m_writer.WriteFixed32((uint)segment.Length); for (int i = 0; i < segment.Length; i++) { unsafe { #if __MonoCS__ var valuePointer = new IntPtr((void *)MemoryDatabaseHandler.ResolveValueAtVersion(segment[i], m_sequence)); if (valuePointer == IntPtr.Zero) { continue; } Value value = new Value(); Marshal.PtrToStructure(valuePointer, value); var keyPointer = new IntPtr((void *)segment[i]); Key key = new Key(); Marshal.PtrToStructure(keyPointer, key); Contract.Assert(key.Size <= MemoryDatabaseHandler.MAX_KEY_SIZE); // Key Size uint size = key.Size; m_writer.WriteVarint32(size); m_writer.WriteBytesUnsafe(&(key.Data), (int)size); // Value m_writer.WriteVarint64(value.Sequence); // sequence size = value.Size; if (size == 0) { // empty key m_writer.WriteByte(0); } else { m_writer.WriteVarint32(size); // value size m_writer.WriteBytesUnsafe(&(value.Data), (int)size); // value data } #else Value *value = MemoryDatabaseHandler.ResolveValueAtVersion(segment[i], m_sequence); if (value == null) { continue; } Key *key = (Key *)segment[i]; //.ToPointer(); Contract.Assert(key != null && key->Size <= MemoryDatabaseHandler.MAX_KEY_SIZE); // Key Size uint size = key->Size; m_writer.WriteVarint32(size); m_writer.WriteBytesUnsafe(&(key->Data), (int)size); // Value m_writer.WriteVarint64(value->Sequence); // sequence size = value->Size; if (size == 0) { // empty key m_writer.WriteByte(0); } else { m_writer.WriteVarint32(size); // value size m_writer.WriteBytesUnsafe(&(value->Data), (int)size); // value data } #endif } if (m_writer.Position >= SnapshotFormat.FLUSH_SIZE) { //Console.WriteLine("> partial flush (" + writer.Position + ")"); int written = await m_file.WriteCompletePagesAsync(m_writer.Buffer, m_writer.Position, ct).ConfigureAwait(false); if (written > 0) { m_writer.Flush(written); } } } m_writer.WriteFixed32(uint.MaxValue); //TODO: CRC? (would need to be computed on the fly, because we don't have the full slice in memory probably) m_writer.WriteFixed32(0); var levelEnd = checked (m_file.Length + (uint)m_writer.Position); m_jumpTable[level] = new KeyValuePair <ulong, ulong>(levelStart, levelEnd - levelStart); //Console.WriteLine("## level " + level + " ends at " + levelEnd); // optional padding to fill the rest of the page PadPageIfNeeded(SnapshotFormat.PAGE_SIZE, (byte)(0xFC - level)); }