/// <summary> /// Write an array of bytes. The array data is provided as a /// <see cref="ReadOnlySpan{T}">ReadOnlySpan</see><<see cref="byte"/>>, and deserialized to a byte array. /// </summary> /// <param name="span">The array data.</param> public void WriteValue(ReadOnlySpan <byte> span) { int length = span.Length; switch (length) { case 0: _writer.Write((byte)EncodingKind.Array_0); break; case 1: _writer.Write((byte)EncodingKind.Array_1); break; case 2: _writer.Write((byte)EncodingKind.Array_2); break; case 3: _writer.Write((byte)EncodingKind.Array_3); break; default: _writer.Write((byte)EncodingKind.Array); WriteCompressedUInt((uint)length); break; } var elementType = typeof(byte); LorettaDebug.Assert(s_typeMap[elementType] == EncodingKind.UInt8); WritePrimitiveType(elementType, EncodingKind.UInt8); #if NETCOREAPP _writer.Write(span); #else // BinaryWriter in .NET Framework does not support ReadOnlySpan<byte>, so we use a temporary buffer to write // arrays of data. The buffer is chosen to be no larger than 8K, which avoids allocations in the large // object heap. var buffer = new byte[Math.Min(length, 8192)]; for (int offset = 0; offset < length; offset += buffer.Length) { var segmentLength = Math.Min(buffer.Length, length - offset); span.Slice(offset, segmentLength).CopyTo(buffer.AsSpan()); _writer.Write(buffer, 0, segmentLength); } #endif }
/// <summary> /// Get maximum char count needed to decode the entire stream. /// </summary> /// <exception cref="IOException">Stream is so big that max char count can't fit in <see cref="int"/>.</exception> internal static int GetMaxCharCountOrThrowIfHuge(this Encoding encoding, Stream stream) { LorettaDebug.Assert(stream.CanSeek); long length = stream.Length; if (encoding.TryGetMaxCharCount(length, out int maxCharCount)) { return(maxCharCount); } #if CODE_STYLE throw new IOException(CodeStyleResources.Stream_is_too_long); #elif WORKSPACE throw new IOException(WorkspacesResources.Stream_is_too_long); #else throw new IOException(CodeAnalysisResources.StreamIsTooLong); #endif }
/// <summary> /// Creates a new instance of a <see cref="ObjectWriter"/>. /// </summary> /// <param name="stream">The stream to write to.</param> /// <param name="leaveOpen">True to leave the <paramref name="stream"/> open after the <see cref="ObjectWriter"/> is disposed.</param> /// <param name="cancellationToken">Cancellation token.</param> public ObjectWriter( Stream stream, bool leaveOpen = false, CancellationToken cancellationToken = default) { // String serialization assumes both reader and writer to be of the same endianness. // It can be adjusted for BigEndian if needed. LorettaDebug.Assert(BitConverter.IsLittleEndian); _writer = new BinaryWriter(stream, Encoding.UTF8, leaveOpen); _objectReferenceMap = new WriterReferenceMap(valueEquality: false); _stringReferenceMap = new WriterReferenceMap(valueEquality: true); _cancellationToken = cancellationToken; // Capture a copy of the current static binder state. That way we don't have to // access any locks while we're doing our processing. _binderSnapshot = ObjectBinder.GetSnapshot(); WriteVersion(); }
private ObjectReader( Stream stream, bool leaveOpen, CancellationToken cancellationToken) { // String serialization assumes both reader and writer to be of the same endianness. // It can be adjusted for BigEndian if needed. LorettaDebug.Assert(BitConverter.IsLittleEndian); _reader = new BinaryReader(stream, Encoding.UTF8, leaveOpen); _objectReferenceMap = ReaderReferenceMap <object> .Create(); _stringReferenceMap = ReaderReferenceMap <string> .Create(); // Capture a copy of the current static binder state. That way we don't have to // access any locks while we're doing our processing. _binderSnapshot = ObjectBinder.GetSnapshot(); _cancellationToken = cancellationToken; }
public object ReadValue() { var oldDepth = _recursionDepth; _recursionDepth++; object value; if (_recursionDepth % ObjectWriter.MaxRecursionDepth == 0) { // If we're recursing too deep, move the work to another thread to do so we // don't blow the stack. var task = Task.Factory.StartNew( ReadValueWorker, _cancellationToken, TaskCreationOptions.LongRunning, TaskScheduler.Default); // We must not proceed until the additional task completes. After returning from a read, the underlying // stream providing access to raw memory will be closed; if this occurs before the separate thread // completes its read then an access violation can occur attempting to read from unmapped memory. // // CANCELLATION: If cancellation is required, DO NOT attempt to cancel the operation by cancelling this // wait. Cancellation must only be implemented by modifying 'task' to cancel itself in a timely manner // so the wait can complete. value = task.GetAwaiter().GetResult(); } else { value = ReadValueWorker(); } _recursionDepth--; LorettaDebug.Assert(oldDepth == _recursionDepth); return(value); }
internal static bool TextEqualsASCII(string text, ReadOnlySpan <byte> ascii) { #if DEBUG for (var i = 0; i < ascii.Length; i++) { LorettaDebug.Assert((ascii[i] & 0x80) == 0, $"The {nameof(ascii)} input to this method must be valid ASCII."); } #endif if (ascii.Length != text.Length) { return(false); } for (var i = 0; i < ascii.Length; i++) { if (ascii[i] != text[i]) { return(false); } } return(true); }
public void WriteValue(object?value) { LorettaDebug.Assert(value == null || !value.GetType().GetTypeInfo().IsEnum, "Enum should not be written with WriteValue. Write them as ints instead."); if (value == null) { _writer.Write((byte)EncodingKind.Null); return; } var type = value.GetType(); var typeInfo = type.GetTypeInfo(); LorettaDebug.Assert(!typeInfo.IsEnum, "Enums should not be written with WriteObject. Write them out as integers instead."); // Perf: Note that JIT optimizes each expression value.GetType() == typeof(T) to a single register comparison. // Also the checks are sorted by commonality of the checked types. // The primitive types are // Boolean, Byte, SByte, Int16, UInt16, Int32, UInt32, // Int64, UInt64, IntPtr, UIntPtr, Char, Double, and Single. if (typeInfo.IsPrimitive) { // Note: int, double, bool, char, have been chosen to go first as they're they // common values of literals in code, and so would be the likely hits if we do // have a primitive type we're serializing out. if (value.GetType() == typeof(int)) { WriteEncodedInt32((int)value); } else if (value.GetType() == typeof(double)) { _writer.Write((byte)EncodingKind.Float8); _writer.Write((double)value); } else if (value.GetType() == typeof(bool)) { _writer.Write((byte)((bool)value ? EncodingKind.Boolean_True : EncodingKind.Boolean_False)); } else if (value.GetType() == typeof(char)) { _writer.Write((byte)EncodingKind.Char); _writer.Write((ushort)(char)value); // written as ushort because BinaryWriter fails on chars that are unicode surrogates } else if (value.GetType() == typeof(byte)) { _writer.Write((byte)EncodingKind.UInt8); _writer.Write((byte)value); } else if (value.GetType() == typeof(short)) { _writer.Write((byte)EncodingKind.Int16); _writer.Write((short)value); } else if (value.GetType() == typeof(long)) { _writer.Write((byte)EncodingKind.Int64); _writer.Write((long)value); } else if (value.GetType() == typeof(sbyte)) { _writer.Write((byte)EncodingKind.Int8); _writer.Write((sbyte)value); } else if (value.GetType() == typeof(float)) { _writer.Write((byte)EncodingKind.Float4); _writer.Write((float)value); } else if (value.GetType() == typeof(ushort)) { _writer.Write((byte)EncodingKind.UInt16); _writer.Write((ushort)value); } else if (value.GetType() == typeof(uint)) { WriteEncodedUInt32((uint)value); } else if (value.GetType() == typeof(ulong)) { _writer.Write((byte)EncodingKind.UInt64); _writer.Write((ulong)value); } else { throw ExceptionUtilities.UnexpectedValue(value.GetType()); } } else if (value.GetType() == typeof(decimal)) { _writer.Write((byte)EncodingKind.Decimal); _writer.Write((decimal)value); } else if (value.GetType() == typeof(DateTime)) { _writer.Write((byte)EncodingKind.DateTime); _writer.Write(((DateTime)value).ToBinary()); } else if (value.GetType() == typeof(string)) { WriteStringValue((string)value); } else if (type.IsArray) { var instance = (Array)value; if (instance.Rank > 1) { throw new InvalidOperationException(Resources.Arrays_with_more_than_one_dimension_cannot_be_serialized); } WriteArray(instance); } else if (value is Encoding encoding) { WriteEncoding(encoding); } else { WriteObject(instance: value, instanceAsWritable: null); } }
/// <summary> /// Merges the new change ranges into the old change ranges, adjusting the new ranges to be with respect to the original text /// (with neither old or new changes applied) instead of with respect to the original text after "old changes" are applied. /// /// This may require splitting, concatenation, etc. of individual change ranges. /// </summary> /// <remarks> /// Both `oldChanges` and `newChanges` must contain non-overlapping spans in ascending order. /// </remarks> public static ImmutableArray <TextChangeRange> Merge(ImmutableArray <TextChangeRange> oldChanges, ImmutableArray <TextChangeRange> newChanges) { // Earlier steps are expected to prevent us from ever reaching this point with empty change sets. if (oldChanges.IsEmpty) { throw new ArgumentException($"'{nameof(oldChanges)}' must not be empty.", nameof(oldChanges)); } if (newChanges.IsEmpty) { throw new ArgumentException($"'{nameof(newChanges)}' must not be empty.", nameof(newChanges)); } var builder = ArrayBuilder <TextChangeRange> .GetInstance(); var oldChange = oldChanges[0]; var newChange = new UnadjustedNewChange(newChanges[0]); var oldIndex = 0; var newIndex = 0; // The sum of characters inserted by old changes minus characters deleted by old changes. // This value must be adjusted whenever characters from an old change are added to `builder`. var oldDelta = 0; // In this loop we "zip" together potentially overlapping old and new changes. // It's important that when overlapping changes are found, we don't consume past the end of the overlapping section until the next iteration. // so that we don't miss scenarios where the section after the overlap we found itself overlaps with another change // e.g.: // [-------oldChange1------] // [--newChange1--] [--newChange2--] while (true) { if (oldChange.Span.Length == 0 && oldChange.NewLength == 0) { // old change does not insert or delete any characters, so it can be dropped to no effect. if (tryGetNextOldChange()) { continue; } else { break; } } else if (newChange.SpanLength == 0 && newChange.NewLength == 0) { // new change does not insert or delete any characters, so it can be dropped to no effect. if (tryGetNextNewChange()) { continue; } else { break; } } else if (newChange.SpanEnd <= oldChange.Span.Start + oldDelta) { // new change is entirely before old change, so just take the new change // old[--------] // new[--------] adjustAndAddNewChange(builder, oldDelta, newChange); if (tryGetNextNewChange()) { continue; } else { break; } } else if (newChange.SpanStart >= oldChange.NewEnd() + oldDelta) { // new change is entirely after old change, so just take the old change // old[--------] // new[--------] addAndAdjustOldDelta(builder, ref oldDelta, oldChange); if (tryGetNextOldChange()) { continue; } else { break; } } else if (newChange.SpanStart < oldChange.Span.Start + oldDelta) { // new change starts before old change, but the new change deletion overlaps with the old change insertion // note: 'd' represents a deleted character, 'a' represents a character inserted by an old change, and 'b' represents a character inserted by a new change. // // old|dddddd| // |aaaaaa| // --------------- // new|dddddd| // |bbbbbb| // align the new change and old change start by consuming the part of the new deletion before the old change // (this only deletes characters of the original text) // // old|dddddd| // |aaaaaa| // --------------- // new|ddd| // |bbbbbb| var newChangeLeadingDeletion = oldChange.Span.Start + oldDelta - newChange.SpanStart; adjustAndAddNewChange(builder, oldDelta, new UnadjustedNewChange(newChange.SpanStart, newChangeLeadingDeletion, newLength: 0)); newChange = new UnadjustedNewChange(oldChange.Span.Start + oldDelta, newChange.SpanLength - newChangeLeadingDeletion, newChange.NewLength); continue; } else if (newChange.SpanStart > oldChange.Span.Start + oldDelta) { // new change starts after old change, but overlaps // // old|dddddd| // |aaaaaa| // --------------- // new|dddddd| // |bbbbbb| // align the old change to the new change by consuming the part of the old change which is before the new change. // // old|ddd| // |aaa| // --------------- // new|dddddd| // |bbbbbb| var oldChangeLeadingInsertion = newChange.SpanStart - (oldChange.Span.Start + oldDelta); // we must make sure to delete at most as many characters as the entire oldChange deletes var oldChangeLeadingDeletion = Math.Min(oldChange.Span.Length, oldChangeLeadingInsertion); addAndAdjustOldDelta(builder, ref oldDelta, new TextChangeRange(new TextSpan(oldChange.Span.Start, oldChangeLeadingDeletion), oldChangeLeadingInsertion)); oldChange = new TextChangeRange(new TextSpan(newChange.SpanStart - oldDelta, oldChange.Span.Length - oldChangeLeadingDeletion), oldChange.NewLength - oldChangeLeadingInsertion); continue; } else { // old and new change start at same adjusted position LorettaDebug.Assert(newChange.SpanStart == oldChange.Span.Start + oldDelta); if (newChange.SpanLength <= oldChange.NewLength) { // new change deletes fewer characters than old change inserted // // old|dddddd| // |aaaaaa| // --------------- // new|ddd| // |bbbbbb| // - apply the new change deletion to the old change insertion // // old|dddddd| // |aaa| // --------------- // new|| // |bbbbbb| // // - move the new change insertion forward by the same amount as its consumed deletion to remain aligned with the old change. // (because the old change and new change have the same adjusted start position, the new change insertion appears directly before the old change insertion in the final text) // // old|dddddd| // |aaa| // --------------- // new|| // |bbbbbb| oldChange = new TextChangeRange(oldChange.Span, oldChange.NewLength - newChange.SpanLength); // the new change deletion is equal to the subset of the old change insertion that we are consuming this iteration oldDelta += newChange.SpanLength; // since the new change insertion occurs before the old change, consume it now newChange = new UnadjustedNewChange(newChange.SpanEnd, spanLength: 0, newChange.NewLength); adjustAndAddNewChange(builder, oldDelta, newChange); if (tryGetNextNewChange()) { continue; } else { break; } } else { // new change deletes more characters than old change inserted // // old|d| // |aa| // --------------- // new|ddd| // |bbb| // merge the old change into the new change: // - new change deletion deletes all of the old change insertion. reduce the new change deletion accordingly // // old|d| // || // --------------- // new|d| // |bbb| // // - old change deletion is simply added to the new change deletion. // // old|| // || // --------------- // new|dd| // |bbb| // // - new change is moved to put its adjusted position equal to the old change we just merged in // // old|| // || // --------------- // new|dd| // |bbb| // adjust the oldDelta to reflect that the old change has been consumed oldDelta = oldDelta - oldChange.Span.Length + oldChange.NewLength; var newDeletion = newChange.SpanLength + oldChange.Span.Length - oldChange.NewLength; newChange = new UnadjustedNewChange(oldChange.Span.Start + oldDelta, newDeletion, newChange.NewLength); if (tryGetNextOldChange()) { continue; } else { break; } } } } // there may be remaining old changes or remaining new changes (not both, and not neither) switch (oldIndex == oldChanges.Length, newIndex == newChanges.Length) {