Esempio n. 1
0
        public static Slice[] Merge(Slice prefix, [NotNull] Slice[] keys)
        {
            if (prefix == null)
            {
                throw new ArgumentNullException("prefix");
            }
            if (keys == null)
            {
                throw new ArgumentNullException("keys");
            }

            //REVIEW: merge this code with Slice.ConcatRange!

            // we can pre-allocate exactly the buffer by computing the total size of all keys
            int size   = keys.Sum(key => key.Count) + keys.Length * prefix.Count;
            var writer = new SliceWriter(size);
            var next   = new List <int>(keys.Length);

            //TODO: use multiple buffers if item count is huge ?

            foreach (var key in keys)
            {
                if (prefix.IsPresent)
                {
                    writer.WriteBytes(prefix);
                }
                writer.WriteBytes(key);
                next.Add(writer.Position);
            }

            return(FdbKey.SplitIntoSegments(writer.Buffer, 0, next));
        }
        /// <summary>Merge an array of keys with a same prefix, all sharing the same buffer</summary>
        /// <param name="prefix">Prefix shared by all keys</param>
        /// <param name="keys">Array of keys to pack</param>
        /// <returns>Array of slices (for all keys) that share the same underlying buffer</returns>
        public static Slice[] Merge(Slice prefix, Slice[] keys)
        {
            if (prefix.IsNull)
            {
                throw new ArgumentNullException(nameof(prefix));
            }
            Contract.NotNull(keys);

            //REVIEW: merge this code with Slice.ConcatRange!

            // we can pre-allocate exactly the buffer by computing the total size of all keys
            int size   = keys.Sum(key => key.Count) + keys.Length * prefix.Count;
            var writer = new SliceWriter(size);
            var next   = new List <int>(keys.Length);

            //TODO: use multiple buffers if item count is huge ?

            var prefixSpan = prefix.Span;

            foreach (var key in keys)
            {
                if (prefixSpan.Length != 0)
                {
                    writer.WriteBytes(prefixSpan);
                }
                writer.WriteBytes(key.Span);
                next.Add(writer.Position);
            }

            return(SplitIntoSegments(writer.Buffer, 0, next));
        }
Esempio n. 3
0
        public SliceWriter OpenWriter(int extra = 32)
        {
            var key = GetKeyPrefix();
            var sw  = new SliceWriter(key.Count + extra);            //TODO: BufferPool ?

            sw.WriteBytes(key);
            return(sw);
        }
		private static unsafe bool TryWriteUnescapedUtf8String(ref SliceWriter writer, char* chars, int count)
		{
			Contract.Requires(chars != null && count >= 0);

			// Several observations:
			// * Most strings will be keywords or ASCII-only with no zeroes. These can be copied directly to the buffer
			// * We will only attempt to optimze strings that don't have any 00 to escape to 00 FF. For these, we will fallback to converting to byte[] then escaping.
			// * Since .NET's strings are UTF-16, the max possible UNICODE value to encode is 0xFFFF, which takes 3 bytes in UTF-8 (EF BF BF)
			// * Most western europe languages have only a few non-ASCII chars here and there, and most of them will only use 2 bytes (ex: 'é' => 'C3 A9')
			// * More complex scripts with dedicated symbol pages (kanjis, arabic, ....) will take 2 or 3 bytes for each charecter.

			// We will first do a pass to check for the presence of 00 and non-ASCII chars
			// => if we find at least on 00, we fallback to escaping the result of Encoding.UTF8.GetBytes()
			// => if we find only ASCII (1..127) chars, we have an optimized path that will truncate the chars to bytes
			// => if not, we will use an UTF8Encoder to convert the string to UTF-8, in chunks, using a small buffer allocated on the stack

			#region First pass: look for \0 and non-ASCII chars

			// fastest way to check for non-ASCII, is to OR all the chars together, and look at bits 7 to 15. If they are not all zero, there is at least ONE non-ASCII char.
			// also, we abort as soon as we find a \0

			char* ptr = chars;
			char* end = chars + count;
			char mask = '\0', c;
			while (ptr < end && (c = *ptr) != '\0') { mask |= c; ++ptr; }

			if (ptr < end) return false; // there is at least one \0 in the string

			// bit 7-15 all unset means the string is pure ASCII
			if ((mask >> 7) == 0)
			{ // => directly dump the chars to the buffer
				WriteUnescapedAsciiChars(ref writer, chars, count);
				return true;
			}

			#endregion

			#region Second pass: encode the string to UTF-8, in chunks

			// Here we know that there is at least one unicode char, and that there are no \0
			// We will tterate through the string, filling as much of the buffer as possible

			bool done;
			int charsUsed, bytesUsed;
			int remaining = count;
			ptr = chars;

			// We need at most 3 * CHUNK_SIZE to encode the chunk
			// > For small strings, we will allocated exactly string.Length * 3 bytes, and will be done in one chunk
			// > For larger strings, we will call encoder.Convert(...) until it says it is done.
			const int CHUNK_SIZE = 1024;
			int bufLen = Encoding.UTF8.GetMaxByteCount(Math.Min(count, CHUNK_SIZE));
			byte* buf = stackalloc byte[bufLen];

			// We can not really predict the final size of the encoded string, but:
			// * Western languages have a few chars that usually need 2 bytes. If we pre-allocate 50% more bytes, it should fit most of the time, without too much waste
			// * Eastern langauges will have all chars encoded to 3 bytes. If we also pre-allocated 50% more, we should only need one resize of the buffer (150% x 2 = 300%), which is acceptable
			writer.EnsureBytes(checked(2 + count + (count >> 1))); // preallocate 150% of the string + 2 bytes
			writer.UnsafeWriteByte(FdbTupleTypes.Utf8);

			var encoder = Encoding.UTF8.GetEncoder();
			// note: encoder.Convert() tries to fill up the buffer as much as possible with complete chars, and will set 'done' to true when all chars have been converted.
			do
			{
				encoder.Convert(ptr, remaining, buf, bufLen, true, out charsUsed, out bytesUsed, out done);
				if (bytesUsed > 0)
				{
					writer.WriteBytes(buf, bytesUsed);
				}
				remaining -= charsUsed;
				ptr += charsUsed;
			}
			while (!done);
			Contract.Assert(remaining == 0 && ptr == end);

			// close the string
			writer.WriteByte(0x00);

			#endregion

			return true;
		}