internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected, bool canBeEmbedded) { stream.Seek(0, SeekOrigin.Begin); long longLength = stream.Length; if (longLength == 0) { return SourceText.From(string.Empty, encoding, checksumAlgorithm); } var maxCharRemainingGuess = encoding.GetMaxCharCountOrThrowIfHuge(stream); Debug.Assert(longLength > 0 && longLength <= int.MaxValue); // GetMaxCharCountOrThrowIfHuge should have thrown. int length = (int)longLength; using (var reader = new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks: true, bufferSize: Math.Min(length, 4096), leaveOpen: true)) { var chunks = ReadChunksFromTextReader(reader, maxCharRemainingGuess, throwIfBinaryDetected); // We must compute the checksum and embedded text blob now while we still have the original bytes in hand. // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip. var checksum = CalculateChecksum(stream, checksumAlgorithm); var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default(ImmutableArray<byte>); return new LargeText(chunks, reader.CurrentEncoding, checksum, checksumAlgorithm, embeddedTextBlob); } }
public DebugSourceInfo( ImmutableArray<byte> checksum, SourceHashAlgorithm checksumAlgorithm, ImmutableArray<byte> embeddedTextBlob = default(ImmutableArray<byte>)) : this(checksum, DebugSourceDocument.GetAlgorithmGuid(checksumAlgorithm), embeddedTextBlob) { }
public LargeTextWriter(Encoding encoding, SourceHashAlgorithm checksumAlgorithm, int length) { _encoding = encoding; _checksumAlgorithm = checksumAlgorithm; _chunks = ArrayBuilder<char[]>.GetInstance(1 + length / LargeText.ChunkSize); _bufferSize = Math.Min(LargeText.ChunkSize, length); }
// internal for testing internal static SourceText Create(Stream stream, Func<Encoding> getEncoding, Encoding defaultEncoding = null, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1) { Debug.Assert(stream != null); Debug.Assert(stream.CanRead && stream.CanSeek); bool detectEncoding = defaultEncoding == null; if (detectEncoding) { try { return Decode(stream, s_utf8Encoding, checksumAlgorithm, throwIfBinaryDetected: false); } catch (DecoderFallbackException) { // Fall back to Encoding.ASCII } } try { return Decode(stream, defaultEncoding ?? getEncoding(), checksumAlgorithm, throwIfBinaryDetected: detectEncoding); } catch (DecoderFallbackException e) { throw new InvalidDataException(e.Message); } }
internal static void ValidateChecksumAlgorithm(SourceHashAlgorithm checksumAlgorithm) { if (!Cci.DebugSourceDocument.IsSupportedAlgorithm(checksumAlgorithm)) { throw new ArgumentException(CodeAnalysisResources.UnsupportedHashAlgorithm, nameof(checksumAlgorithm)); } }
internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected, bool canBeEmbedded) { stream.Seek(0, SeekOrigin.Begin); long longLength = stream.Length; if (longLength == 0) { return SourceText.From(string.Empty, encoding, checksumAlgorithm); } var maxCharRemainingGuess = encoding.GetMaxCharCountOrThrowIfHuge(stream); Debug.Assert(longLength > 0 && longLength <= int.MaxValue); // GetMaxCharCountOrThrowIfHuge should have thrown. int length = (int)longLength; using (var reader = new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks: true, bufferSize: Math.Min(length, 4096), leaveOpen: true)) { ArrayBuilder<char[]> chunks = ArrayBuilder<char[]>.GetInstance(1 + maxCharRemainingGuess / ChunkSize); while (!reader.EndOfStream) { var nextChunkSize = ChunkSize; if (maxCharRemainingGuess < ChunkSize) { // maxCharRemainingGuess typically overestimates a little // so we will first fill a slightly smaller (maxCharRemainingGuess - 64) chunk // and then use 64 char tail, which is likley to be resized. nextChunkSize = Math.Max(maxCharRemainingGuess - 64, 64); } char[] chunk = new char[nextChunkSize]; int charsRead = reader.ReadBlock(chunk, 0, chunk.Length); if (charsRead == 0) { break; } maxCharRemainingGuess -= charsRead; if (charsRead < chunk.Length) { Array.Resize(ref chunk, charsRead); } // Check for binary files if (throwIfBinaryDetected && IsBinary(chunk)) { throw new InvalidDataException(); } chunks.Add(chunk); } // We must compute the checksum and embedded text blob now while we still have the original bytes in hand. // We cannot re-encode to obtain checksum and blob as the encoding is not guaranteed to round-trip. var checksum = CalculateChecksum(stream, checksumAlgorithm); var embeddedTextBlob = canBeEmbedded ? EmbeddedText.CreateBlob(stream) : default(ImmutableArray<byte>); return new LargeText(chunks.ToImmutableAndFree(), reader.CurrentEncoding, checksum, checksumAlgorithm, embeddedTextBlob); } }
internal StringText(string source, Encoding encodingOpt, ImmutableArray<byte> checksum = default(ImmutableArray<byte>), SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1) : base(checksum, checksumAlgorithm) { Debug.Assert(source != null); _source = source; _encodingOpt = encodingOpt; }
public StringBuilderText(StringBuilder builder, Encoding encodingOpt, SourceHashAlgorithm checksumAlgorithm) : base(checksumAlgorithm: checksumAlgorithm) { Debug.Assert(builder != null); _builder = builder; _encodingOpt = encodingOpt; }
/// <summary> /// Initializes an instance of <see cref="SourceText"/> from the provided stream. This version differs /// from <see cref="SourceText.From(Stream, Encoding, SourceHashAlgorithm, bool)"/> in two ways: /// 1. It attempts to minimize allocations by trying to read the stream into a byte array. /// 2. If <paramref name="defaultEncoding"/> is null, it will first try UTF8 and, if that fails, it will /// try CodePage 1252. If CodePage 1252 is not available on the system, then it will try Latin1. /// </summary> /// <param name="stream">The stream containing encoded text.</param> /// <param name="defaultEncoding"> /// Specifies an encoding to be used if the actual encoding can't be determined from the stream content (the stream doesn't start with Byte Order Mark). /// If not specified auto-detect heuristics are used to determine the encoding. If these heuristics fail the decoding is assumed to be Encoding.Default. /// Note that if the stream starts with Byte Order Mark the value of <paramref name="defaultEncoding"/> is ignored. /// </param> /// <param name="checksumAlgorithm">Hash algorithm used to calculate document checksum.</param> /// <exception cref="InvalidDataException"> /// The stream content can't be decoded using the specified <paramref name="defaultEncoding"/>, or /// <paramref name="defaultEncoding"/> is null and the stream appears to be a binary file. /// </exception> /// <exception cref="IOException">An IO error occurred while reading from the stream.</exception> internal static SourceText Create(Stream stream, Encoding defaultEncoding = null, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1) { return Create(stream, () => s_fallbackEncoding, defaultEncoding: defaultEncoding, checksumAlgorithm: checksumAlgorithm); }
/// <summary> /// Constructs a <see cref="SourceText"/> from text in a string. /// </summary> /// <param name="text">Text.</param> /// <param name="encoding"> /// Encoding of the file that the <paramref name="text"/> was read from or is going to be saved to. /// <c>null</c> if the encoding is unspecified. /// If the encoding is not specified the resulting <see cref="SourceText"/> isn't debuggable. /// If an encoding-less <see cref="SourceText"/> is written to a file a <see cref="Encoding.UTF8"/> shall be used as a default. /// </param> /// <param name="checksumAlgorithm"> /// Hash algorithm to use to calculate checksum of the text that's saved to PDB. /// </param> /// <exception cref="ArgumentNullException"><paramref name="text"/> is null.</exception> /// <exception cref="ArgumentException"><paramref name="checksumAlgorithm"/> is not supported.</exception> public static SourceText From(string text, Encoding encoding = null, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1) { if (text == null) { throw new ArgumentNullException(nameof(text)); } return new StringText(text, encoding, checksumAlgorithm: checksumAlgorithm); }
private EmbeddedText(string filePath, ImmutableArray<byte> checksum, SourceHashAlgorithm checksumAlgorithm, ImmutableArray<byte> blob) { Debug.Assert(filePath?.Length > 0); Debug.Assert(Cci.DebugSourceDocument.IsSupportedAlgorithm(checksumAlgorithm)); Debug.Assert(!blob.IsDefault && blob.Length >= sizeof(int)); FilePath = filePath; Checksum = checksum; ChecksumAlgorithm = checksumAlgorithm; Blob = blob; }
internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected) { stream.Seek(0, SeekOrigin.Begin); int length = (int)stream.Length; if (length == 0) { return SourceText.From(string.Empty, encoding, checksumAlgorithm); } var maxCharRemainingGuess = encoding.GetMaxCharCount(length); using (var reader = new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks: true, bufferSize: Math.Min(length, 4096), leaveOpen: true)) { ArrayBuilder<char[]> chunks = ArrayBuilder<char[]>.GetInstance(1 + maxCharRemainingGuess / ChunkSize); while (!reader.EndOfStream) { var nextChunkSize = ChunkSize; if (maxCharRemainingGuess < ChunkSize) { // maxCharRemainingGuess typically overestimates a little // so we will first fill a slightly smaller (maxCharRemainingGuess - 64) chunk // and then use 64 char tail, which is likley to be resized. nextChunkSize = Math.Max(maxCharRemainingGuess - 64, 64); } char[] chunk = new char[nextChunkSize]; int charsRead = reader.ReadBlock(chunk, 0, chunk.Length); if (charsRead == 0) { break; } maxCharRemainingGuess -= charsRead; if (charsRead < chunk.Length) { Array.Resize(ref chunk, charsRead); } // Check for binary files if (throwIfBinaryDetected && IsBinary(chunk)) { throw new InvalidDataException(); } chunks.Add(chunk); } var checksum = CalculateChecksum(stream, checksumAlgorithm); return new LargeText(chunks.ToImmutableAndFree(), reader.CurrentEncoding, checksum, checksumAlgorithm); } }
/// <summary> /// Initializes an instance of <see cref="SourceText"/> from the provided stream. This version differs /// from <see cref="SourceText.From(Stream, Encoding, SourceHashAlgorithm, bool)"/> in two ways: /// 1. It attempts to minimize allocations by trying to read the stream into a byte array. /// 2. If <paramref name="defaultEncoding"/> is null, it will first try UTF8 and, if that fails, it will /// try CodePage 1252. If CodePage 1252 is not available on the system, then it will try Latin1. /// </summary> /// <param name="stream">The stream containing encoded text.</param> /// <param name="defaultEncoding"> /// Specifies an encoding to be used if the actual encoding can't be determined from the stream content (the stream doesn't start with Byte Order Mark). /// If not specified auto-detect heuristics are used to determine the encoding. If these heuristics fail the decoding is assumed to be Encoding.Default. /// Note that if the stream starts with Byte Order Mark the value of <paramref name="defaultEncoding"/> is ignored. /// </param> /// <param name="canBeEmbedded">Indicates if the file can be embedded in the PDB.</param> /// <param name="checksumAlgorithm">Hash algorithm used to calculate document checksum.</param> /// <exception cref="InvalidDataException"> /// The stream content can't be decoded using the specified <paramref name="defaultEncoding"/>, or /// <paramref name="defaultEncoding"/> is null and the stream appears to be a binary file. /// </exception> /// <exception cref="IOException">An IO error occurred while reading from the stream.</exception> internal static SourceText Create(Stream stream, Encoding defaultEncoding = null, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1, bool canBeEmbedded = false) { return Create(stream, s_fallbackEncoding, defaultEncoding: defaultEncoding, checksumAlgorithm: checksumAlgorithm, canBeEmbedded: canBeEmbedded); }
public static SourceTextWriter Create(Encoding encoding, SourceHashAlgorithm checksumAlgorithm, int length) { if (length < SourceText.LargeObjectHeapLimitInChars) { return new StringTextWriter(encoding, checksumAlgorithm, length); } else { return new LargeTextWriter(encoding, checksumAlgorithm, length); } }
private static SourceText CreateMemoryStreamBasedEncodedText(byte[] bytes, Encoding readEncodingOpt, SourceHashAlgorithm algorithm = SourceHashAlgorithm.Sha1) { // For testing purposes, create a bigger buffer so that we verify // that the implementation only uses the part that's covered by the stream and not the entire array. byte[] buffer = new byte[bytes.Length + 10]; bytes.CopyTo(buffer, 0); using (var stream = new MemoryStream(buffer, 0, bytes.Length, writable: true, publiclyVisible: true)) { return EncodedStringText.Create(stream, readEncodingOpt, algorithm); } }
internal static SourceText Decode(TextReader reader, int length, Encoding encodingOpt, SourceHashAlgorithm checksumAlgorithm) { if (length == 0) { return SourceText.From(string.Empty, encodingOpt, checksumAlgorithm); } // throwIfBinaryDetected == false since we are given text reader from the beginning var chunks = ReadChunksFromTextReader(reader, length, throwIfBinaryDetected: false); return new LargeText(chunks, encodingOpt, checksumAlgorithm); }
private const int LargeObjectHeapLimit = 80 * 1024; // 80KB private EncodedStringText(string source, Encoding encoding, ImmutableArray<byte> checksum, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinary) : base(checksum: checksum, checksumAlgorithm: checksumAlgorithm) { if (throwIfBinary && IsBinary(source)) { throw new InvalidDataException(); } Debug.Assert(source != null); Debug.Assert(encoding != null); _source = source; _encoding = encoding; }
protected SourceText(ImmutableArray<byte> checksum = default(ImmutableArray<byte>), SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1, SourceTextContainer container = null) { ValidateChecksumAlgorithm(checksumAlgorithm); if (!checksum.IsDefault && checksum.Length != CryptographicHashProvider.GetHashSize(checksumAlgorithm)) { throw new ArgumentException(CodeAnalysisResources.InvalidHash, nameof(checksum)); } _checksumAlgorithm = checksumAlgorithm; _lazyChecksum = checksum; _lazyContainer = container; }
internal static HashAlgorithm TryGetAlgorithm(SourceHashAlgorithm algorithmId) { switch (algorithmId) { case SourceHashAlgorithm.Sha1: return SHA1.Create(); case SourceHashAlgorithm.Sha256: return SHA256.Create(); default: return null; } }
internal static int GetHashSize(SourceHashAlgorithm algorithmId) { switch (algorithmId) { case SourceHashAlgorithm.Sha1: return 160 / 8; case SourceHashAlgorithm.Sha256: return 256 / 8; default: throw ExceptionUtilities.UnexpectedValue(algorithmId); } }
internal static HashAlgorithm TryGetAlgorithm(SourceHashAlgorithm algorithmId) { switch (algorithmId) { case SourceHashAlgorithm.Sha1: return new SHA1CryptoServiceProvider(); case SourceHashAlgorithm.Sha256: return new SHA256CryptoServiceProvider(); default: return null; } }
internal ParsedSyntaxTree(SourceText textOpt, Encoding encodingOpt, SourceHashAlgorithm checksumAlgorithm, string path, CSharpParseOptions options, CSharpSyntaxNode root, Syntax.InternalSyntax.DirectiveStack directives, bool cloneRoot = true) { Debug.Assert(root != null); Debug.Assert(options != null); Debug.Assert(textOpt == null || textOpt.Encoding == encodingOpt && textOpt.ChecksumAlgorithm == checksumAlgorithm); _lazyText = textOpt; _encodingOpt = encodingOpt ?? textOpt?.Encoding; _checksumAlgorithm = checksumAlgorithm; _options = options; _path = path ?? string.Empty; _root = cloneRoot ? this.CloneNodeAsRoot(root) : root; _hasCompilationUnitRoot = root.Kind() == SyntaxKind.CompilationUnit; this.SetDirectiveStack(directives); }
internal static bool IsSupportedAlgorithm(SourceHashAlgorithm algorithm) { // Dev12 debugger supports MD5, SHA1. // Dev14 debugger supports MD5, SHA1, SHA256. // MD5 is obsolete. switch (algorithm) { case SourceHashAlgorithm.Sha1: case SourceHashAlgorithm.Sha256: return true; default: return false; } }
internal LargeText(ImmutableArray<char[]> chunks, Encoding encoding, ImmutableArray<byte> checksum, SourceHashAlgorithm checksumAlgorithm, ImmutableArray<byte> embeddedTextBlob) : base(checksum, checksumAlgorithm, embeddedTextBlob) { _chunks = chunks; _encoding = encoding; _chunkStartOffsets = new int[chunks.Length]; int offset = 0; for (int i = 0; i < chunks.Length; i++) { _chunkStartOffsets[i] = offset; offset += chunks[i].Length; } _length = offset; }
internal SourceText(ImmutableArray<byte> checksum, SourceHashAlgorithm checksumAlgorithm, ImmutableArray<byte> embeddedTextBlob) : this(checksum, checksumAlgorithm, container: null) { // We should never have precomputed the embedded text blob without precomputing the checksum. Debug.Assert(embeddedTextBlob.IsDefault || !checksum.IsDefault); if (!checksum.IsDefault && embeddedTextBlob.IsDefault) { // We can't compute the embedded text blob lazily if we're given a precomputed checksum. // This happens when source bytes/stream were given, but canBeEmbedded=true was not passed. _precomputedEmbeddedTextBlob = ImmutableArray<byte>.Empty; } else { _precomputedEmbeddedTextBlob = embeddedTextBlob; } }
private CompositeText(ImmutableArray<SourceText> segments, Encoding encoding, SourceHashAlgorithm checksumAlgorithm) : base(checksumAlgorithm: checksumAlgorithm) { Debug.Assert(!segments.IsDefaultOrEmpty); _segments = segments; _encoding = encoding; ComputeLengthAndStorageSize(segments, out _length, out _storageSize); _segmentOffsets = new int[segments.Length]; int offset = 0; for (int i = 0; i < _segmentOffsets.Length; i++) { _segmentOffsets[i] = offset; offset += _segments[i].Length; } }
internal static SourceText Decode(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected) { stream.Seek(0, SeekOrigin.Begin); int length = (int)stream.Length; if (length == 0) { return SourceText.From(string.Empty, encoding, checksumAlgorithm); } using (var reader = new StreamReader(stream, encoding, detectEncodingFromByteOrderMarks: true, bufferSize: Math.Min(length, 4096), leaveOpen: true)) { ArrayBuilder<char[]> chunks = ArrayBuilder<char[]>.GetInstance(1 + length / ChunkSize); while (!reader.EndOfStream) { char[] chunk = new char[ChunkSize]; int charsRead = reader.ReadBlock(chunk, 0, ChunkSize); if (charsRead == 0) { break; } if (charsRead < ChunkSize) { Array.Resize(ref chunk, charsRead); } // Check for binary files if (throwIfBinaryDetected && IsBinary(chunk)) { throw new InvalidDataException(); } chunks.Add(chunk); } var checksum = CalculateChecksum(stream, checksumAlgorithm); return new LargeText(chunks.ToImmutableAndFree(), reader.CurrentEncoding, checksum, checksumAlgorithm); } }
internal static Guid GetAlgorithmGuid(SourceHashAlgorithm algorithm) { Debug.Assert(IsSupportedAlgorithm(algorithm)); // Dev12 debugger supports MD5, SHA1. // Dev14 debugger supports MD5, SHA1, SHA256. // MD5 is obsolete. unchecked { switch (algorithm) { case SourceHashAlgorithm.Sha1: return new Guid((int)0xff1816ec, (short)0xaa5e, 0x4d10, 0x87, 0xf7, 0x6f, 0x49, 0x63, 0x83, 0x34, 0x60); case SourceHashAlgorithm.Sha256: return new Guid((int)0x8829d00f, 0x11b8, 0x4213, 0x87, 0x8b, 0x77, 0x0e, 0x85, 0x97, 0xac, 0x16); default: throw ExceptionUtilities.UnexpectedValue(algorithm); } } }
internal static bool TryGetAlgorithmGuid(SourceHashAlgorithm algorithm, out Guid guid) { // Dev12 debugger supports MD5, SHA1. // Dev14 debugger supports MD5, SHA1, SHA256. // MD5 is obsolete. unchecked { switch (algorithm) { case SourceHashAlgorithm.Sha1: guid = new Guid((int)0xff1816ec, (short)0xaa5e, 0x4d10, 0x87, 0xf7, 0x6f, 0x49, 0x63, 0x83, 0x34, 0x60); return true; case SourceHashAlgorithm.Sha256: guid = new Guid((int)0x8829d00f, 0x11b8, 0x4213, 0x87, 0x8b, 0x77, 0x0e, 0x85, 0x97, 0xac, 0x16); return true; default: guid = default(Guid); return false; } } }
/// <summary> /// Reads content of a source file. /// </summary> /// <param name="file">Source file information.</param> /// <param name="diagnostics">Storage for diagnostics.</param> /// <param name="encoding">Encoding to use or 'null' for autodetect/default</param> /// <param name="checksumAlgorithm">Hash algorithm used to calculate file checksum.</param> /// <param name="normalizedFilePath">If given <paramref name="file"/> opens successfully, set to normalized absolute path of the file, null otherwise.</param> /// <returns>File content or null on failure.</returns> internal SourceText ReadFileContent(CommandLineSourceFile file, IList <DiagnosticInfo> diagnostics, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, out string normalizedFilePath) { try { using (var data = new FileStream(file.Path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { normalizedFilePath = data.Name; return(EncodedStringText.Create(data, encoding, checksumAlgorithm)); } } catch (Exception e) { diagnostics.Add(ToFileReadDiagnostics(e, file)); normalizedFilePath = null; return(null); } }
public static bool IsSupportedAlgorithm(SourceHashAlgorithm algorithm) => algorithm switch {
private static SourceText FromLargeTextWriter(string source, Encoding encoding, SourceHashAlgorithm checksumAlgorithm) { using (var writer = new LargeTextWriter(encoding, checksumAlgorithm, source.Length)) { writer.Write(source); return(writer.ToSourceText()); } }
internal static void ValidateChecksumAlgorithm(SourceHashAlgorithm checksumAlgorithm) { // NOTE: For RoslynP all checksum algorithms are valid }
private static EncodedStringText CreateMemoryStreamBasedEncodedText(string text, Encoding writeEncoding, Encoding readEncodingOpt, SourceHashAlgorithm algorithm = SourceHashAlgorithm.Sha1) { byte[] bytes = writeEncoding.GetBytesWithPreamble(text); // For testing purposes, create a bigger buffer so that we verify // that the implementation only uses the part that's covered by the stream and not the entire array. byte[] buffer = new byte[bytes.Length + 10]; bytes.CopyTo(buffer, 0); using (var stream = new MemoryStream(buffer, 0, bytes.Length, writable: true, publiclyVisible: true)) { return(EncodedStringText.Create(stream, readEncodingOpt, algorithm)); } }
internal static SourceText Decode(Stream data, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected, bool canBeEmbedded) => EncodedStringText.Decode(data, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded);
private const int LargeObjectHeapLimit = 80 * 1024; // 80KB private EncodedStringText(string source, Encoding encoding, ImmutableArray <byte> checksum, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinary) : base(checksum: checksum, checksumAlgorithm: checksumAlgorithm) { if (throwIfBinary && IsBinary(source)) { throw new InvalidDataException(); } Debug.Assert(source != null); Debug.Assert(encoding != null); _source = source; _encoding = encoding; }
private CompositeText(ImmutableArray <SourceText> segments, Encoding?encoding, SourceHashAlgorithm checksumAlgorithm) : base(checksumAlgorithm: checksumAlgorithm) { RoslynDebug.Assert(!segments.IsDefaultOrEmpty); _segments = segments; _encoding = encoding; ComputeLengthAndStorageSize(segments, out _length, out _storageSize); _segmentOffsets = new int[segments.Length]; int offset = 0; for (int i = 0; i < _segmentOffsets.Length; i++) { _segmentOffsets[i] = offset; offset += _segments[i].Length; } }
private static SourceText CreateMemoryStreamBasedEncodedText(string text, Encoding writeEncoding, Encoding readEncodingOpt, SourceHashAlgorithm algorithm = SourceHashAlgorithm.Sha1) { byte[] bytes = writeEncoding.GetBytesWithPreamble(text); return(CreateMemoryStreamBasedEncodedText(bytes, readEncodingOpt, algorithm)); }
/// <summary> /// Constructs a <see cref="SourceText"/> from a byte array. /// </summary> /// <param name="buffer">The encoded source buffer.</param> /// <param name="length">The number of bytes to read from the buffer.</param> /// <param name="encoding"> /// Data encoding to use if the encoded buffer doesn't start with Byte Order Mark. /// <see cref="Encoding.UTF8"/> if not specified. /// </param> /// <param name="checksumAlgorithm"> /// Hash algorithm to use to calculate checksum of the text that's saved to PDB. /// </param> /// <param name="throwIfBinaryDetected">If the decoded text contains at least two consecutive NUL /// characters, then an <see cref="InvalidDataException"/> is thrown.</param> /// <returns>The decoded text.</returns> /// <exception cref="ArgumentNullException">The <paramref name="buffer"/> is null.</exception> /// <exception cref="ArgumentOutOfRangeException">The <paramref name="length"/> is negative or longer than the <paramref name="buffer"/>.</exception> /// <exception cref="ArgumentException"><paramref name="checksumAlgorithm"/> is not supported.</exception> /// <exception cref="DecoderFallbackException">If the given encoding is set to use a throwing decoder as a fallback</exception> /// <exception cref="InvalidDataException">Two consecutive NUL characters were detected in the decoded text and <paramref name="throwIfBinaryDetected"/> was true.</exception> public static SourceText From(byte[] buffer, int length, Encoding encoding = null, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1, bool throwIfBinaryDetected = false) { if (buffer == null) { throw new ArgumentNullException(nameof(buffer)); } if (length < 0 || length > buffer.Length) { throw new ArgumentOutOfRangeException(nameof(length)); } ValidateChecksumAlgorithm(checksumAlgorithm); string text = Decode(buffer, length, encoding ?? s_utf8EncodingWithNoBOM, out encoding); if (throwIfBinaryDetected && IsBinary(text)) { throw new InvalidDataException(); } // Since we have the bytes in hand, it's easy to compute the checksum. var checksum = CalculateChecksum(buffer, 0, length, checksumAlgorithm); return(new StringText(text, encoding, checksum, checksumAlgorithm)); }
internal static ImmutableArray <byte> CalculateChecksum(byte[] buffer, int offset, int count, SourceHashAlgorithm algorithmId) { using (var algorithm = CryptographicHashProvider.TryGetAlgorithm(algorithmId)) { Debug.Assert(algorithm != null); return(ImmutableArray.Create(algorithm.ComputeHash(buffer, offset, count))); } }
internal LargeText(ImmutableArray <char[]> chunks, Encoding?encodingOpt, ImmutableArray <byte> checksum, SourceHashAlgorithm checksumAlgorithm) : base(checksum, checksumAlgorithm) { _chunks = chunks; _encodingOpt = encodingOpt; _chunkStartOffsets = new int[chunks.Length]; int offset = 0; for (int i = 0; i < chunks.Length; i++) { _chunkStartOffsets[i] = offset; offset += chunks[i].Length; } _length = offset; }
internal LargeText(ImmutableArray <char[]> chunks, Encoding?encodingOpt, SourceHashAlgorithm checksumAlgorithm) : this(chunks, encodingOpt, default, checksumAlgorithm) { }
/// <summary> /// Initializes an instance of <see cref="SourceText"/> from the provided stream. This version differs /// from <see cref="SourceText.From(Stream, Encoding, SourceHashAlgorithm, bool)"/> in two ways: /// 1. It attempts to minimize allocations by trying to read the stream into a byte array. /// 2. If <paramref name="defaultEncoding"/> is null, it will first try UTF8 and, if that fails, it will /// try CodePage 1252. If CodePage 1252 is not available on the system, then it will try Latin1. /// </summary> /// <param name="stream">The stream containing encoded text.</param> /// <param name="defaultEncoding"> /// Specifies an encoding to be used if the actual encoding can't be determined from the stream content (the stream doesn't start with Byte Order Mark). /// If not specified auto-detect heuristics are used to determine the encoding. If these heuristics fail the decoding is assumed to be <see cref="Encoding.Default"/>. /// Note that if the stream starts with Byte Order Mark the value of <paramref name="defaultEncoding"/> is ignored. /// </param> /// <param name="checksumAlgorithm">Hash algorithm used to calculate document checksum.</param> /// <exception cref="InvalidDataException"> /// The stream content can't be decoded using the specified <paramref name="defaultEncoding"/>, or /// <paramref name="defaultEncoding"/> is null and the stream appears to be a binary file. /// </exception> /// <exception cref="IOException">An IO error occurred while reading from the stream.</exception> internal static SourceText Create(Stream stream, Encoding defaultEncoding = null, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1) { Debug.Assert(stream != null); Debug.Assert(stream.CanRead && stream.CanSeek); bool detectEncoding = defaultEncoding == null; if (detectEncoding) { try { return(Decode(stream, s_fallbackEncoding, checksumAlgorithm, throwIfBinaryDetected: false)); } catch (DecoderFallbackException) { // Fall back to Encoding.ASCII } } try { return(Decode(stream, defaultEncoding ?? s_defaultEncoding, checksumAlgorithm, throwIfBinaryDetected: detectEncoding)); } catch (DecoderFallbackException e) { throw new InvalidDataException(e.Message); } }
internal static ImmutableArray <byte> ComputeSourceHash(ImmutableArray <byte> bytes, SourceHashAlgorithm hashAlgorithm = SourceHashAlgorithmUtils.DefaultContentHashAlgorithm) { var algorithmName = GetAlgorithmName(hashAlgorithm); using (var incrementalHash = IncrementalHash.CreateHash(algorithmName)) { incrementalHash.AppendData(bytes.ToArray()); return(ImmutableArray.Create(incrementalHash.GetHashAndReset())); } }
private bool?TryReadSourceFileChecksumFromPdb(string sourceFilePath, Project project, out ImmutableArray <byte> checksum, out SourceHashAlgorithm algorithm) { checksum = default; algorithm = default; try { var compilationOutputs = _debuggingSession.GetCompilationOutputs(project); DebugInformationReaderProvider?debugInfoReaderProvider; try { debugInfoReaderProvider = compilationOutputs.OpenPdb(); } catch (Exception e) { EditAndContinueWorkspaceService.Log.Write("Source '{0}' doesn't match output PDB: error opening PDB '{1}': {2}", sourceFilePath, compilationOutputs.PdbDisplayPath, e.Message); debugInfoReaderProvider = null; } if (debugInfoReaderProvider == null) { EditAndContinueWorkspaceService.Log.Write("Source '{0}' doesn't match output PDB: PDB '{1}' not found", sourceFilePath, compilationOutputs.PdbDisplayPath); return(null); } try { var debugInfoReader = debugInfoReaderProvider.CreateEditAndContinueMethodDebugInfoReader(); if (!debugInfoReader.TryGetDocumentChecksum(sourceFilePath, out checksum, out var algorithmId)) { EditAndContinueWorkspaceService.Log.Write("Source '{0}' doesn't match output PDB: no document", sourceFilePath); return(false); } algorithm = SourceHashAlgorithms.GetSourceHashAlgorithm(algorithmId); if (algorithm == SourceHashAlgorithm.None) { // This can only happen if the PDB was post-processed by a misbehaving tool. EditAndContinueWorkspaceService.Log.Write("Source '{0}' doesn't match PDB: unknown checksum alg", sourceFilePath); } return(true); } catch (Exception e) { EditAndContinueWorkspaceService.Log.Write("Source '{0}' doesn't match output PDB: error reading symbols: {1}", sourceFilePath, e.Message); } finally { debugInfoReaderProvider.Dispose(); } } catch (Exception e) when(FatalError.ReportWithoutCrashUnlessCanceled(e)) { EditAndContinueWorkspaceService.Log.Write("Source '{0}' doesn't match PDB: unexpected exception: {1}", sourceFilePath, e.Message); } return(null); }
/// <summary> /// Constructs an <see cref="EmbeddedText"/> from bytes. /// </summary> /// <param name="filePath">The file path (pre-normalization) to use in the PDB.</param> /// <param name="bytes">The bytes.</param> /// <param name="checksumAlgorithm">Hash algorithm to use to calculate checksum of the text that's saved to PDB.</param> /// <exception cref="ArgumentNullException"> /// <paramref name="bytes"/> is default-initialized. /// <paramref name="filePath" /> is null. /// </exception> /// <exception cref="ArgumentException"> /// <paramref name="filePath" /> is empty. /// <paramref name="checksumAlgorithm"/> is not supported. /// </exception> /// <exception cref="IOException">An I/O error occurs.</exception> /// <remarks>Reads from the beginning of the stream. Leaves the stream open.</remarks> public static EmbeddedText FromBytes(string filePath, ArraySegment <byte> bytes, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1) { ValidateFilePath(filePath); if (bytes.Array == null) { throw new ArgumentNullException(nameof(bytes)); } SourceText.ValidateChecksumAlgorithm(checksumAlgorithm); return(new EmbeddedText( filePath, SourceText.CalculateChecksum(bytes.Array, bytes.Offset, bytes.Count, checksumAlgorithm), checksumAlgorithm, CreateBlob(bytes))); }
internal StringText(string source, Encoding encodingOpt, ImmutableArray <byte> checksum = default(ImmutableArray <byte>), SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1) : base(checksum, checksumAlgorithm) { Debug.Assert(source != null); _source = source; _encodingOpt = encodingOpt; }
public StringTextWriter(Encoding encoding, SourceHashAlgorithm checksumAlgorithm, int capacity) { _builder = new StringBuilder(capacity); _encoding = encoding; _checksumAlgorithm = checksumAlgorithm; }
/// <summary> /// Reads content of a source file. /// </summary> /// <param name="file">Source file information.</param> /// <param name="diagnostics">Storage for diagnostics.</param> /// <param name="encoding">Encoding to use or 'null' for autodetect/default</param> /// <param name="checksumAlgorithm">Hash algorithm used to calculate file checksum.</param> /// <param name="normalizedFilePath">If given <paramref name="file"/> opens successfully, set to normalized absolute path of the file, null otherwise.</param> /// <returns>File content or null on failure.</returns> internal SourceText ReadFileContent(CommandLineSourceFile file, IList <DiagnosticInfo> diagnostics, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, out string normalizedFilePath) { try { // PERF: Using a very small buffer size for the FileStream opens up an optimization within EncodedStringText where // we read the entire FileStream into a byte array in one shot. For files that are actually smaller than the buffer // size, FileStream.Read still allocates the internal buffer. using (var data = new FileStream(file.Path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, bufferSize: 1)) { normalizedFilePath = data.Name; return(EncodedStringText.Create(data, encoding, checksumAlgorithm)); } } catch (Exception e) { diagnostics.Add(ToFileReadDiagnostics(e, file)); normalizedFilePath = null; return(null); } }
internal static ImmutableArray <byte> ComputeSourceHash(IEnumerable <Blob> bytes, SourceHashAlgorithm hashAlgorithm = SourceHashAlgorithmUtils.DefaultContentHashAlgorithm) { return(ComputeHash(GetAlgorithmName(hashAlgorithm), bytes)); }
private (SourceText?embeddedText, byte[]? compressedHash) ResolveEmbeddedSource(DocumentHandle document, SourceHashAlgorithm hashAlgorithm, Encoding encoding) { byte[] bytes = (from handle in PdbReader.GetCustomDebugInformation(document) let cdi = PdbReader.GetCustomDebugInformation(handle) where PdbReader.GetGuid(cdi.Kind) == EmbeddedSourceGuid select PdbReader.GetBlobBytes(cdi.Value)).SingleOrDefault(); if (bytes == null) { return(default);
protected SourceText(ImmutableArray <byte> checksum = default(ImmutableArray <byte>), SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1, SourceTextContainer container = null) { ValidateChecksumAlgorithm(checksumAlgorithm); if (!checksum.IsDefault && checksum.Length != CryptographicHashProvider.GetHashSize(checksumAlgorithm)) { throw new ArgumentException(CodeAnalysisResources.InvalidHash, nameof(checksum)); } _checksumAlgorithm = checksumAlgorithm; _lazyChecksum = checksum; _lazyContainer = container; }
internal static SourceText Decode(TextReader reader, int length, Encoding?encodingOpt, SourceHashAlgorithm checksumAlgorithm) { if (length == 0) { return(From(string.Empty, encodingOpt, checksumAlgorithm)); } // throwIfBinaryDetected == false since we are given text reader from the beginning var chunks = ReadChunksFromTextReader(reader, length, throwIfBinaryDetected: false); return(new LargeText(chunks, encodingOpt, checksumAlgorithm)); }
/// <summary> /// Reads content of a source file. /// </summary> /// <param name="file">Source file information.</param> /// <param name="diagnostics">Storage for diagnostics.</param> /// <param name="encoding">Encoding to use or 'null' for autodetect/default</param> /// <param name="checksumAlgorithm">Hash algorithm used to calculate file checksum.</param> /// <returns>File content or null on failure.</returns> internal SourceText ReadFileContent(CommandLineSourceFile file, IList <DiagnosticInfo> diagnostics, Encoding encoding, SourceHashAlgorithm checksumAlgorithm) { string discarded; return(ReadFileContent(file, diagnostics, encoding, checksumAlgorithm, out discarded)); }
internal static bool IsSupportedAlgorithm(SourceHashAlgorithm algorithm) { Guid guid; return(TryGetAlgorithmGuid(algorithm, out guid)); }
public static SourceText From(Stream stream, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected) => From(stream, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded: false);
private static ImmutableArray<byte> CalculateChecksum(byte[] buffer, int offset, int count, SourceHashAlgorithm algorithmId) { using (var algorithm = CryptographicHashProvider.TryGetAlgorithm(algorithmId)) { Debug.Assert(algorithm != null); return ImmutableArray.Create(algorithm.ComputeHash(buffer, offset, count)); } }
public static SourceText From(byte[] buffer, int length, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, bool throwIfBinaryDetected) => From(buffer, length, encoding, checksumAlgorithm, throwIfBinaryDetected, canBeEmbedded: false);
internal static SourceText Create(Stream stream, Lazy <Encoding> getEncoding, Encoding defaultEncoding, SourceHashAlgorithm checksumAlgorithm, bool canBeEmbedded) => EncodedStringText.Create(stream, getEncoding, defaultEncoding, checksumAlgorithm, canBeEmbedded);
/// <summary> /// Initializes an instance of <see cref="EncodedStringText"/> with provided bytes. /// </summary> /// <param name="stream"></param> /// <param name="defaultEncoding"> /// Specifies an encoding to be used if the actual encoding can't be determined from the stream content (the stream doesn't start with Byte Order Mark). /// If not specified auto-detect heuristics are used to determine the encoding. If these heuristics fail the decoding is assumed to be <see cref="Encoding.Default"/>. /// Note that if the stream starts with Byte Order Mark the value of <paramref name="defaultEncoding"/> is ignored. /// </param> /// <param name="checksumAlgorithm">Hash algorithm used to calculate document checksum.</param> /// <exception cref="InvalidDataException"> /// The stream content can't be decoded using the specified <paramref name="defaultEncoding"/>, or /// <paramref name="defaultEncoding"/> is null and the stream appears to be a binary file. /// </exception> /// <exception cref="IOException">An IO error occurred while reading from the stream.</exception> internal static EncodedStringText Create(Stream stream, Encoding defaultEncoding = null, SourceHashAlgorithm checksumAlgorithm = SourceHashAlgorithm.Sha1) { Debug.Assert(stream != null); Debug.Assert(stream.CanRead && stream.CanSeek); bool detectEncoding = defaultEncoding == null; string text; Encoding preambleEncoding; Encoding actualEncoding; if (detectEncoding) { preambleEncoding = TryReadByteOrderMark(stream); if (preambleEncoding == null) { // If we didn't find a recognized byte order mark, check to see if the file contents are valid UTF-8 // with no byte order mark. Detecting UTF-8 with no byte order mark implicitly decodes the entire stream // to check each byte, so we won't decode again unless we've already detected some other encoding or // this is not valid UTF-8. var utf8NoBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true); try { // If we successfully decode the content of the stream as UTF8 it is likely not binary, // so we don't need to check that. text = Decode(stream, utf8NoBom, out actualEncoding); return(new EncodedStringText(text, actualEncoding, checksumAlgorithm)); } catch (DecoderFallbackException) { // fall back to default encoding } } } else { preambleEncoding = null; } try { text = Decode(stream, preambleEncoding ?? defaultEncoding ?? Encoding.Default, out actualEncoding); } catch (DecoderFallbackException e) { throw new InvalidDataException(e.Message); } if (detectEncoding && IsBinary(text)) { throw new InvalidDataException(); } return(new EncodedStringText(text, actualEncoding, checksumAlgorithm)); }