internal static SourceText Decode(Stream stream,
                                          Encoding encoding,
                                          SourceHashAlgorithm checksumAlgorithm,
                                          bool throwIfBinaryDetected,
                                          bool canBeEmbedded)
        {
            stream.Seek(0, SeekOrigin.Begin);
            long longLength = stream.Length;

            if (longLength == 0)
            {
                return(From(string.Empty, encoding, checksumAlgorithm));
            }

            var maxCharRemainingGuess = encoding.GetMaxCharCountOrThrowIfHuge(stream);

            Debug.Assert(longLength > 0 && longLength <= int.MaxValue); // GetMaxCharCountOrThrowIfHuge should have thrown.
            int length = (int)longLength;

            using (var reader = new StreamReader(stream, encoding, true, Math.Min(length, 4096), true))
            {
                var chunks = ReadChunksFromTextReader(reader, maxCharRemainingGuess, throwIfBinaryDetected);

                // We must compute the checksum and embedded text blob now while we still have the original bytes in hand.
                // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip.
                var checksum = CalculateChecksum(stream, checksumAlgorithm);
                //var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default;
                return(new LargeText(chunks, reader.CurrentEncoding, checksum, checksumAlgorithm, default));
            }
        }
Example #2
0
        internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected, bool canBeEmbedded)
        {
            stream.Seek(0, SeekOrigin.Begin);

            long longLength = stream.Length;
            if (longLength == 0)
            {
                return SourceText.From(string.Empty, encoding, checksumAlgorithm);
            }

            var maxCharRemainingGuess = encoding.GetMaxCharCountOrThrowIfHuge(stream);
            Debug.Assert(longLength > 0 && longLength <= int.MaxValue); // GetMaxCharCountOrThrowIfHuge should have thrown.
            int length = (int)longLength;

            using (var reader = new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks: true, bufferSize: Math.Min(length, 4096), leaveOpen: true))
            {
                var chunks = ReadChunksFromTextReader(reader, maxCharRemainingGuess, throwIfBinaryDetected);

                // We must compute the checksum and embedded text blob now while we still have the original bytes in hand.
                // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip.
                var checksum = CalculateChecksum(stream, checksumAlgorithm);
                var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default(ImmutableArray<byte>);
                return new LargeText(chunks, reader.CurrentEncoding, checksum, checksumAlgorithm, embeddedTextBlob);
            }
        }
Example #3
0
        internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected, bool canBeEmbedded)
        {
            stream.Seek(0, SeekOrigin.Begin);

            long longLength = stream.Length;
            if (longLength == 0)
            {
                return SourceText.From(string.Empty, encoding, checksumAlgorithm);
            }

            var maxCharRemainingGuess = encoding.GetMaxCharCountOrThrowIfHuge(stream);
            Debug.Assert(longLength > 0 && longLength <= int.MaxValue); // GetMaxCharCountOrThrowIfHuge should have thrown.
            int length = (int)longLength;

            using (var reader = new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks: true, bufferSize: Math.Min(length, 4096), leaveOpen: true))
            {
                ArrayBuilder<char[]> chunks = ArrayBuilder<char[]>.GetInstance(1 + maxCharRemainingGuess / ChunkSize);
                while (!reader.EndOfStream)
                {
                    var nextChunkSize = ChunkSize;
                    if (maxCharRemainingGuess < ChunkSize)
                    {
                        // maxCharRemainingGuess typically overestimates a little
                        // so we will first fill a slightly smaller (maxCharRemainingGuess - 64) chunk
                        // and then use 64 char tail, which is likley to be resized.
                        nextChunkSize = Math.Max(maxCharRemainingGuess - 64, 64);
                    }

                    char[] chunk = new char[nextChunkSize];

                    int charsRead = reader.ReadBlock(chunk, 0, chunk.Length);
                    if (charsRead == 0)
                    {
                        break;
                    }

                    maxCharRemainingGuess -= charsRead;

                    if (charsRead < chunk.Length)
                    {
                        Array.Resize(ref chunk, charsRead);
                    }

                    // Check for binary files
                    if (throwIfBinaryDetected && IsBinary(chunk))
                    {
                        throw new InvalidDataException();
                    }

                    chunks.Add(chunk);
                }

                // We must compute the checksum and embedded text blob now while we still have the original bytes in hand.
                // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip.
                var checksum = CalculateChecksum(stream, checksumAlgorithm);
                var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default(ImmutableArray<byte>);
                return new LargeText(chunks.ToImmutableAndFree(), reader.CurrentEncoding, checksum, checksumAlgorithm, embeddedTextBlob);
            }
        }
        /// <summary>
        /// Constructs a <see cref="SourceText"/> from stream content.
        /// </summary>
        /// <param name="stream">Stream. The stream must be seekable.</param>
        /// <param name="encoding">
        /// Data encoding to use if the stream doesn't start with Byte Order Mark specifying the encoding.
        /// <see cref="Encoding.UTF8"/> if not specified.
        /// </param>
        /// <param name="checksumAlgorithm">
        /// Hash algorithm to use to calculate checksum of the text that's saved to PDB.
        /// </param>
        /// <param name="throwIfBinaryDetected">If the decoded text contains at least two consecutive NUL
        /// characters, then an <see cref="InvalidDataException"/> is thrown.</param>
        /// <param name="canBeEmbedded">True if the text can be passed to <see cref="EmbeddedText.FromSource"/> and be embedded in a PDB.</param>
        /// <exception cref="ArgumentNullException"><paramref name="stream"/> is null.</exception>
        /// <exception cref="ArgumentException">
        /// <paramref name="stream"/> doesn't support reading or seeking.
        /// <paramref name="checksumAlgorithm"/> is not supported.
        /// </exception>
        /// <exception cref="DecoderFallbackException">If the given encoding is set to use a throwing decoder as a fallback</exception>
        /// <exception cref="InvalidDataException">Two consecutive NUL characters were detected in the decoded text and <paramref name="throwIfBinaryDetected"/> was true.</exception>
        /// <exception cref="IOException">An I/O error occurs.</exception>
        /// <remarks>Reads from the beginning of the stream. Leaves the stream open.</remarks>
        public static SourceText From(
            Stream stream,
            Encoding encoding = null,
            SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1,
            bool throwIfBinaryDetected            = false,
            bool canBeEmbedded = false)
        {
            if (stream == null)
            {
                throw new ArgumentNullException(nameof(stream));
            }

            if (!stream.CanRead || !stream.CanSeek)
            {
                throw new ArgumentException(nameof(stream));
            }

            ValidateChecksumAlgorithm(checksumAlgorithm);
            encoding = encoding ?? s_utf8EncodingWithNoBOM;

            // If the resulting string would end up on the large object heap, then use LargeEncodedText.
            if (encoding.GetMaxCharCountOrThrowIfHuge(stream) >= LargeObjectHeapLimitInChars)
            {
                return(LargeText.Decode(stream, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded));
            }

            string text = Decode(stream, encoding, out encoding);

            if (throwIfBinaryDetected && IsBinary(text))
            {
                throw new InvalidDataException();
            }

            // We must compute the checksum and embedded text blob now while we still have the original bytes in hand.
            // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip.
            var checksum = CalculateChecksum(stream, checksumAlgorithm);

            //*ar embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default;*/
            return(new StringText(text, encoding, checksum, checksumAlgorithm, default));
        }
Example #5
0
        /// <summary>
        /// Constructs a <see cref="SourceText"/> from stream content.
        /// </summary>
        /// <param name="stream">Stream. The stream must be seekable.</param>
        /// <param name="encoding">
        /// Data encoding to use if the stream doesn't start with Byte Order Mark specifying the encoding.
        /// <see cref="Encoding.UTF8"/> if not specified.
        /// </param>
        /// <param name="checksumAlgorithm">
        /// Hash algorithm to use to calculate checksum of the text that's saved to PDB.
        /// </param>
        /// <param name="throwIfBinaryDetected">If the decoded text contains at least two consecutive NUL
        /// characters, then an <see cref="InvalidDataException"/> is thrown.</param>
        /// <param name="canBeEmbedded">True if the text can be passed to <see cref="EmbeddedText.FromSource"/> and be embedded in a PDB.</param>
        /// <exception cref="ArgumentNullException"><paramref name="stream"/> is null.</exception>
        /// <exception cref="ArgumentException">
        /// <paramref name="stream"/> doesn't support reading or seeking.
        /// <paramref name="checksumAlgorithm"/> is not supported.
        /// </exception>
        /// <exception cref="DecoderFallbackException">If the given encoding is set to use a throwing decoder as a fallback</exception>
        /// <exception cref="InvalidDataException">Two consecutive NUL characters were detected in the decoded text and <paramref name="throwIfBinaryDetected"/> was true.</exception>
        /// <exception cref="IOException">An I/O error occurs.</exception>
        /// <remarks>Reads from the beginning of the stream. Leaves the stream open.</remarks>
        public static SourceText From(
            Stream stream,
            Encoding encoding = null,
            SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1,
            bool throwIfBinaryDetected = false,
            bool canBeEmbedded = false)
        {
            if (stream == null)
            {
                throw new ArgumentNullException(nameof(stream));
            }

            if (!stream.CanRead || !stream.CanSeek)
            {
                throw new ArgumentException(CodeAnalysisResources.StreamMustSupportReadAndSeek, nameof(stream));
            }

            ValidateChecksumAlgorithm(checksumAlgorithm);

            encoding = encoding ?? s_utf8EncodingWithNoBOM;

            // If the resulting string would end up on the large object heap, then use LargeEncodedText.
            if (encoding.GetMaxCharCountOrThrowIfHuge(stream) >= LargeObjectHeapLimitInChars)
            {
                return LargeText.Decode(stream, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded);
            }

            string text = Decode(stream, encoding, out encoding);
            if (throwIfBinaryDetected && IsBinary(text))
            {
                throw new InvalidDataException();
            }

            // We must compute the checksum and embedded text blob now while we still have the original bytes in hand.
            // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip.
            var checksum = CalculateChecksum(stream, checksumAlgorithm);
            var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default(ImmutableArray<byte>);
            return new StringText(text, encoding, checksum, checksumAlgorithm, embeddedTextBlob);
        }
Example #6
0
        /// <summary>
        /// Try to create a <see cref="SourceText"/> from the given stream using the given encoding.
        /// </summary>
        /// <param name="data">The input stream containing the encoded text. The stream will not be closed.</param>
        /// <param name="encoding">The expected encoding of the stream. The actual encoding used may be different if byte order marks are detected.</param>
        /// <param name="checksumAlgorithm">The checksum algorithm to use.</param>
        /// <param name="throwIfBinaryDetected">Throw <see cref="InvalidDataException"/> if binary (non-text) data is detected.</param>
        /// <param name="canBeEmbedded">Indicates if the text can be embedded in the PDB.</param>
        /// <returns>The <see cref="SourceText"/> decoded from the stream.</returns>
        /// <exception cref="DecoderFallbackException">The decoder was unable to decode the stream with the given encoding.</exception>
        /// <exception cref="IOException">Error reading from stream.</exception> 
        /// <remarks>
        /// internal for unit testing
        /// </remarks>
        internal static SourceText Decode(
            Stream data,
            Encoding encoding,
            SourceHashAlgorithm checksumAlgorithm, 
            bool throwIfBinaryDetected = false,
            bool canBeEmbedded = false)
        {
            Debug.Assert(data != null);
            Debug.Assert(encoding != null);

            data.Seek(0, SeekOrigin.Begin);

            // For small streams, see if we can read the byte buffer directly.
            if (encoding.GetMaxCharCountOrThrowIfHuge(data) < LargeObjectHeapLimitInChars)
            {
                byte[] buffer = TryGetByteArrayFromStream(data);
                if (buffer != null)
                {
                    return SourceText.From(buffer, (int)data.Length, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded);
                }
            }

            return SourceText.From(data, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded);
        }