internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected, bool canBeEmbedded) { stream.Seek(0, SeekOrigin.Begin); long longLength = stream.Length; if (longLength == 0) { return(From(string.Empty, encoding, checksumAlgorithm)); } var maxCharRemainingGuess = encoding.GetMaxCharCountOrThrowIfHuge(stream); Debug.Assert(longLength > 0 && longLength <= int.MaxValue); // GetMaxCharCountOrThrowIfHuge should have thrown. int length = (int)longLength; using (var reader = new StreamReader(stream, encoding, true, Math.Min(length, 4096), true)) { var chunks = ReadChunksFromTextReader(reader, maxCharRemainingGuess, throwIfBinaryDetected); // We must compute the checksum and embedded text blob now while we still have the original bytes in hand. // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip. var checksum = CalculateChecksum(stream, checksumAlgorithm); //var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default; return(new LargeText(chunks, reader.CurrentEncoding, checksum, checksumAlgorithm, default)); } }
internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected, bool canBeEmbedded) { stream.Seek(0, SeekOrigin.Begin); long longLength = stream.Length; if (longLength == 0) { return SourceText.From(string.Empty, encoding, checksumAlgorithm); } var maxCharRemainingGuess = encoding.GetMaxCharCountOrThrowIfHuge(stream); Debug.Assert(longLength > 0 && longLength <= int.MaxValue); // GetMaxCharCountOrThrowIfHuge should have thrown. int length = (int)longLength; using (var reader = new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks: true, bufferSize: Math.Min(length, 4096), leaveOpen: true)) { var chunks = ReadChunksFromTextReader(reader, maxCharRemainingGuess, throwIfBinaryDetected); // We must compute the checksum and embedded text blob now while we still have the original bytes in hand. // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip. var checksum = CalculateChecksum(stream, checksumAlgorithm); var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default(ImmutableArray<byte>); return new LargeText(chunks, reader.CurrentEncoding, checksum, checksumAlgorithm, embeddedTextBlob); } }
internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected, bool canBeEmbedded) { stream.Seek(0, SeekOrigin.Begin); long longLength = stream.Length; if (longLength == 0) { return SourceText.From(string.Empty, encoding, checksumAlgorithm); } var maxCharRemainingGuess = encoding.GetMaxCharCountOrThrowIfHuge(stream); Debug.Assert(longLength > 0 && longLength <= int.MaxValue); // GetMaxCharCountOrThrowIfHuge should have thrown. int length = (int)longLength; using (var reader = new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks: true, bufferSize: Math.Min(length, 4096), leaveOpen: true)) { ArrayBuilder<char[]> chunks = ArrayBuilder<char[]>.GetInstance(1 + maxCharRemainingGuess / ChunkSize); while (!reader.EndOfStream) { var nextChunkSize = ChunkSize; if (maxCharRemainingGuess < ChunkSize) { // maxCharRemainingGuess typically overestimates a little // so we will first fill a slightly smaller (maxCharRemainingGuess - 64) chunk // and then use 64 char tail, which is likley to be resized. nextChunkSize = Math.Max(maxCharRemainingGuess - 64, 64); } char[] chunk = new char[nextChunkSize]; int charsRead = reader.ReadBlock(chunk, 0, chunk.Length); if (charsRead == 0) { break; } maxCharRemainingGuess -= charsRead; if (charsRead < chunk.Length) { Array.Resize(ref chunk, charsRead); } // Check for binary files if (throwIfBinaryDetected && IsBinary(chunk)) { throw new InvalidDataException(); } chunks.Add(chunk); } // We must compute the checksum and embedded text blob now while we still have the original bytes in hand. // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip. var checksum = CalculateChecksum(stream, checksumAlgorithm); var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default(ImmutableArray<byte>); return new LargeText(chunks.ToImmutableAndFree(), reader.CurrentEncoding, checksum, checksumAlgorithm, embeddedTextBlob); } }
/// <summary> /// Constructs a <see cref="SourceText"/> from stream content. /// </summary> /// <param name="stream">Stream. The stream must be seekable.</param> /// <param name="encoding"> /// Data encoding to use if the stream doesn't start with Byte Order Mark specifying the encoding. /// <see cref="Encoding.UTF8"/> if not specified. /// </param> /// <param name="checksumAlgorithm"> /// Hash algorithm to use to calculate checksum of the text that's saved to PDB. /// </param> /// <param name="throwIfBinaryDetected">If the decoded text contains at least two consecutive NUL /// characters, then an <see cref="InvalidDataException"/> is thrown.</param> /// <param name="canBeEmbedded">True if the text can be passed to <see cref="EmbeddedText.FromSource"/> and be embedded in a PDB.</param> /// <exception cref="ArgumentNullException"><paramref name="stream"/> is null.</exception> /// <exception cref="ArgumentException"> /// <paramref name="stream"/> doesn't support reading or seeking. /// <paramref name="checksumAlgorithm"/> is not supported. /// </exception> /// <exception cref="DecoderFallbackException">If the given encoding is set to use a throwing decoder as a fallback</exception> /// <exception cref="InvalidDataException">Two consecutive NUL characters were detected in the decoded text and <paramref name="throwIfBinaryDetected"/> was true.</exception> /// <exception cref="IOException">An I/O error occurs.</exception> /// <remarks>Reads from the beginning of the stream. Leaves the stream open.</remarks> public static SourceText From( Stream stream, Encoding encoding = null, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1, bool throwIfBinaryDetected = false, bool canBeEmbedded = false) { if (stream == null) { throw new ArgumentNullException(nameof(stream)); } if (!stream.CanRead || !stream.CanSeek) { throw new ArgumentException(nameof(stream)); } ValidateChecksumAlgorithm(checksumAlgorithm); encoding = encoding ?? s_utf8EncodingWithNoBOM; // If the resulting string would end up on the large object heap, then use LargeEncodedText. if (encoding.GetMaxCharCountOrThrowIfHuge(stream) >= LargeObjectHeapLimitInChars) { return(LargeText.Decode(stream, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded)); } string text = Decode(stream, encoding, out encoding); if (throwIfBinaryDetected && IsBinary(text)) { throw new InvalidDataException(); } // We must compute the checksum and embedded text blob now while we still have the original bytes in hand. // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip. var checksum = CalculateChecksum(stream, checksumAlgorithm); //*ar embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default;*/ return(new StringText(text, encoding, checksum, checksumAlgorithm, default)); }
/// <summary> /// Constructs a <see cref="SourceText"/> from stream content. /// </summary> /// <param name="stream">Stream. The stream must be seekable.</param> /// <param name="encoding"> /// Data encoding to use if the stream doesn't start with Byte Order Mark specifying the encoding. /// <see cref="Encoding.UTF8"/> if not specified. /// </param> /// <param name="checksumAlgorithm"> /// Hash algorithm to use to calculate checksum of the text that's saved to PDB. /// </param> /// <param name="throwIfBinaryDetected">If the decoded text contains at least two consecutive NUL /// characters, then an <see cref="InvalidDataException"/> is thrown.</param> /// <param name="canBeEmbedded">True if the text can be passed to <see cref="EmbeddedText.FromSource"/> and be embedded in a PDB.</param> /// <exception cref="ArgumentNullException"><paramref name="stream"/> is null.</exception> /// <exception cref="ArgumentException"> /// <paramref name="stream"/> doesn't support reading or seeking. /// <paramref name="checksumAlgorithm"/> is not supported. /// </exception> /// <exception cref="DecoderFallbackException">If the given encoding is set to use a throwing decoder as a fallback</exception> /// <exception cref="InvalidDataException">Two consecutive NUL characters were detected in the decoded text and <paramref name="throwIfBinaryDetected"/> was true.</exception> /// <exception cref="IOException">An I/O error occurs.</exception> /// <remarks>Reads from the beginning of the stream. Leaves the stream open.</remarks> public static SourceText From( Stream stream, Encoding encoding = null, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1, bool throwIfBinaryDetected = false, bool canBeEmbedded = false) { if (stream == null) { throw new ArgumentNullException(nameof(stream)); } if (!stream.CanRead || !stream.CanSeek) { throw new ArgumentException(CodeAnalysisResources.StreamMustSupportReadAndSeek, nameof(stream)); } ValidateChecksumAlgorithm(checksumAlgorithm); encoding = encoding ?? s_utf8EncodingWithNoBOM; // If the resulting string would end up on the large object heap, then use LargeEncodedText. if (encoding.GetMaxCharCountOrThrowIfHuge(stream) >= LargeObjectHeapLimitInChars) { return LargeText.Decode(stream, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded); } string text = Decode(stream, encoding, out encoding); if (throwIfBinaryDetected && IsBinary(text)) { throw new InvalidDataException(); } // We must compute the checksum and embedded text blob now while we still have the original bytes in hand. // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip. var checksum = CalculateChecksum(stream, checksumAlgorithm); var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default(ImmutableArray<byte>); return new StringText(text, encoding, checksum, checksumAlgorithm, embeddedTextBlob); }
/// <summary> /// Try to create a <see cref="SourceText"/> from the given stream using the given encoding. /// </summary> /// <param name="data">The input stream containing the encoded text. The stream will not be closed.</param> /// <param name="encoding">The expected encoding of the stream. The actual encoding used may be different if byte order marks are detected.</param> /// <param name="checksumAlgorithm">The checksum algorithm to use.</param> /// <param name="throwIfBinaryDetected">Throw <see cref="InvalidDataException"/> if binary (non-text) data is detected.</param> /// <param name="canBeEmbedded">Indicates if the text can be embedded in the PDB.</param> /// <returns>The <see cref="SourceText"/> decoded from the stream.</returns> /// <exception cref="DecoderFallbackException">The decoder was unable to decode the stream with the given encoding.</exception> /// <exception cref="IOException">Error reading from stream.</exception> /// <remarks> /// internal for unit testing /// </remarks> internal static SourceText Decode( Stream data, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected = false, bool canBeEmbedded = false) { Debug.Assert(data != null); Debug.Assert(encoding != null); data.Seek(0, SeekOrigin.Begin); // For small streams, see if we can read the byte buffer directly. if (encoding.GetMaxCharCountOrThrowIfHuge(data) < LargeObjectHeapLimitInChars) { byte[] buffer = TryGetByteArrayFromStream(data); if (buffer != null) { return SourceText.From(buffer, (int)data.Length, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded); } } return SourceText.From(data, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded); }