/// <summary> /// Reads the contents of <paramref name="resolvedPath"/> and returns a <see cref="SourceText"/>. /// </summary> /// <param name="resolvedPath">Path returned by <see cref="ResolveReference(string, string)"/>.</param> public virtual SourceText ReadText(string resolvedPath) { using (var stream = OpenRead(resolvedPath)) { return(EncodedStringText.Create(stream)); } }
public void FileStreamEncodedText() { const string expectedText = "\r\n" + "class Program\r\n" + "{\r\n" + " static void Main()\r\n" + " {\r\n" + " string s = \"class C { \u0410\u0411\u0412 x; }\";\r\n" + " foreach (char ch in s) System.Console.WriteLine(\"{0:x2}\", (int)ch);\r\n" + " }\r\n" + "}\r\n"; var encodings = new Encoding[] { new UnicodeEncoding(bigEndian: true, byteOrderMark: true), new UnicodeEncoding(bigEndian: false, byteOrderMark: true), new UTF8Encoding(encoderShouldEmitUTF8Identifier: true), }; foreach (var encoding in encodings) { var tmpFile = Temp.CreateFile(); File.WriteAllText(tmpFile.Path, expectedText, encoding); using (FileStream fs = new FileStream(tmpFile.Path, FileMode.Open, FileAccess.Read)) { var encodedText = EncodedStringText.Create(fs); Assert.Equal(encoding.CodePage, encodedText.Encoding.CodePage); Assert.Equal(expectedText, encodedText.ToString()); } } }
public void TestMultithreadedDecoding() { const string expectedText = "\r\n" + "class Program\r\n" + "{\r\n" + " static void Main()\r\n" + " {\r\n" + " string s = \"class C { \u0410\u0411\u0412 x; }\";\r\n" + " foreach (char ch in s) System.Console.WriteLine(\"{0:x2}\", (int)ch);\r\n" + " }\r\n" + "}\r\n"; var encoding = new UTF8Encoding(false); string path = Temp.CreateFile().WriteAllBytes(encoding.GetBytes(expectedText)).Path; var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount * 2 }; Parallel.For(0, 500, parallelOptions, i => { using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read)) { var sourceText = EncodedStringText.Create(stream); Assert.Equal(expectedText, sourceText.ToString()); } }); }
public void Decode_NonUtf8() { var utf8 = new UTF8Encoding(false, true); var text = "abc def baz aeiouy " + Encoding.Default.GetString(new byte[] { 0x80, 0x92, 0xA4, 0xB6, 0xC9, 0xDB, 0xED, 0xFF }); var bytes = Encoding.Default.GetBytesWithPreamble(text); // Encoding.Default should not decode to UTF-8 using (var stream = new MemoryStream(bytes)) { Assert.Throws(typeof(DecoderFallbackException), () => { EncodedStringText.Decode(stream, utf8, SourceHashAlgorithm.Sha1); }); Assert.True(stream.CanRead); } // Detect encoding should correctly pick Encoding.Default using (var stream = new MemoryStream(bytes)) { var sourceText = EncodedStringText.Create(stream); Assert.Equal(text, sourceText.ToString()); Assert.Equal(Encoding.Default, sourceText.Encoding); Assert.True(stream.CanRead); } }
public async Task EncodedEmbeddedSource_SJIS_FallbackEncoding(Location pdbLocation) { var source = @" public class C { // ワ public event System.EventHandler E { add { } remove { } } }"; var encoding = Encoding.GetEncoding("SJIS"); await RunTestAsync(async path => { using var ms = new MemoryStream(encoding.GetBytes(source)); var encodedSourceText = EncodedStringText.Create(ms, encoding, canBeEmbedded: true); var(project, symbol) = await CompileAndFindSymbolAsync(path, pdbLocation, Location.Embedded, encodedSourceText, c => c.GetMember("C.E"), fallbackEncoding: encoding); var(actualText, _) = await GetGeneratedSourceTextAsync(project, symbol, Location.Embedded, expectNullResult: false); AssertEx.NotNull(actualText); AssertEx.NotNull(actualText.Encoding); AssertEx.Equal(encoding.WebName, actualText.Encoding.WebName); AssertEx.EqualOrDiff(source, actualText.ToString()); }); }
protected virtual SourceText Create(string source) { byte[] buffer = GetBytes(Encoding.Default, source); using (var stream = new MemoryStream(buffer, 0, buffer.Length, writable: false, publiclyVisible: true)) { return(EncodedStringText.Create(stream)); } }
protected override SourceText Create(string source) { byte[] buffer = GetBytes(new UTF8Encoding(encoderShouldEmitUTF8Identifier: false), source); using (var stream = new MemoryStream(buffer, 0, buffer.Length, writable: false, publiclyVisible: true)) { return(EncodedStringText.Create(stream)); } }
protected override SourceText Create(string source) { byte[] buffer = GetBytes(Encoding.BigEndianUnicode, source); using (var stream = new MemoryStream(buffer, 0, buffer.Length, writable: false, publiclyVisible: true)) { return(EncodedStringText.Create(stream)); } }
public void FromBytes_EncodingFallbackCase() { var source = EncodedStringText.Create(new MemoryStream(new byte[] { 0xA9, 0x0D, 0x0A }), canBeEmbedded: true); var text = EmbeddedText.FromSource("pathToLarge", source); Assert.Equal("pathToLarge", text.FilePath); Assert.Equal(SourceHashAlgorithm.Sha1, text.ChecksumAlgorithm); AssertEx.Equal(source.GetChecksum(), text.Checksum); }
internal Document GetDocument(MetadataAsSourceFile file) { using var reader = File.OpenRead(file.FilePath); var stringText = EncodedStringText.Create(reader); Assert.True(_metadataAsSourceService.TryAddDocumentToWorkspace(file.FilePath, stringText.Container)); return(stringText.Container.GetRelatedDocuments().Single()); }
public SourceText CreateText( Stream stream, Encoding?defaultEncoding, CancellationToken cancellationToken = default ) { cancellationToken.ThrowIfCancellationRequested(); return(EncodedStringText.Create(stream, defaultEncoding)); }
public void FileStreamEncodedTextEmpty() { var tmpFile = Temp.CreateFile(); using (FileStream fs = new FileStream(tmpFile.Path, FileMode.Open, FileAccess.Read)) { var encodedText = EncodedStringText.Create(fs); Assert.Equal(0, encodedText.Length); } }
private static SourceText CreateMemoryStreamBasedEncodedText(byte[] bytes, Encoding readEncodingOpt, SourceHashAlgorithm algorithm = SourceHashAlgorithm.Sha1) { // For testing purposes, create a bigger buffer so that we verify // that the implementation only uses the part that's covered by the stream and not the entire array. byte[] buffer = new byte[bytes.Length + 10]; bytes.CopyTo(buffer, 0); using (var stream = new MemoryStream(buffer, 0, bytes.Length, writable: true, publiclyVisible: true)) { return(EncodedStringText.Create(stream, readEncodingOpt, algorithm)); } }
public void HorizontalEllipsis() { // Character 0x85 in CodePage 1252 is a horizontal ellipsis. // If decoded as Latin-1, then it's incorrectly treated as \u0085 which // is a line break ('NEXT LINE'). byte[] srcBytes = new[] { (byte)0x85 }; using (var ms = new MemoryStream(srcBytes)) { var sourceText = EncodedStringText.Create(ms); Assert.Equal('\u2026', sourceText[0]); } }
public SourceText CreateText(Stream stream, Encoding defaultEncoding, CancellationToken cancellationToken = default(CancellationToken)) { var encoding = EncodedStringText.TryReadByteOrderMark(stream) ?? defaultEncoding ?? new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true); // Close the stream here since we might throw an exception trying to determine the encoding using (stream) { return(CreateTextInternal(stream, encoding, cancellationToken) ?? CreateTextInternal(stream, Encoding.Default, cancellationToken)); } }
public void Decode_Utf8() { var utf8 = new UTF8Encoding(false, true); var text = "abc def baz aeiouy äëïöüû"; var bytes = GetBytes(utf8, text); // Detect encoding should correctly pick UTF-8 using (var stream = new MemoryStream(bytes)) { Assert.Equal(text, EncodedStringText.DetectEncodingAndDecode(stream)); Assert.True(stream.CanRead); } }
public void TryReadByteOrderMark() { Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[0]))); Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xef }))); Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xef, 0xbb }))); Assert.Equal("Unicode (UTF-8)", EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xef, 0xBB, 0xBF })).EncodingName); Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xff }))); Assert.Equal("Unicode", EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xff, 0xfe })).EncodingName); Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xfe }))); Assert.Equal("Unicode (Big-Endian)", EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xfe, 0xff })).EncodingName); }
public void TryReadByteOrderMark() { Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[0]))); Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xef }))); Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xef, 0xbb }))); Assert.Equal(Encoding.UTF8, EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xef, 0xBB, 0xBF }))); Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xff }))); Assert.Equal(Encoding.Unicode, EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xff, 0xfe }))); Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xfe }))); Assert.Equal(Encoding.BigEndianUnicode, EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xfe, 0xff }))); }
private static EncodedStringText CreateMemoryStreamBasedEncodedText(string text, Encoding writeEncoding, Encoding readEncodingOpt) { byte[] bytes = writeEncoding.GetBytesWithPreamble(text); // For testing purposes, create a bigger buffer so that we verify // that the implementation only uses the part that's covered by the stream and not the entire array. byte[] buffer = new byte[bytes.Length + 10]; bytes.CopyTo(buffer, 0); using (var stream = new MemoryStream(buffer, 0, bytes.Length, writable: true, publiclyVisible: true)) { return(EncodedStringText.Create(stream, readEncodingOpt)); } }
public void Decode_Utf8() { var utf8 = new UTF8Encoding(false, true); var text = "abc def baz aeiouy äëïöüû"; var bytes = utf8.GetBytesWithPreamble(text); // Detect encoding should correctly pick UTF-8 using (var stream = new MemoryStream(bytes)) { var sourceText = EncodedStringText.Create(stream); Assert.Equal(text, sourceText.ToString()); Assert.Equal(Encoding.UTF8.EncodingName, sourceText.Encoding.EncodingName); Assert.True(stream.CanRead); } }
/// <summary> /// Produces a syntax tree by parsing the source file. /// </summary> public static SyntaxTree ParseFile( string path, CSharpParseOptions options = null, CancellationToken cancellationToken = default(CancellationToken)) { if (string.IsNullOrEmpty(path)) { throw new ArgumentException("path"); } using (var data = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { return(ParseText(EncodedStringText.Create(data), options, path, cancellationToken)); } }
/// <summary> /// Reads content of a source file. /// </summary> /// <param name="file">Source file information.</param> /// <param name="diagnostics">Storage for diagnostics.</param> /// <param name="encoding">Encoding to use or 'null' for autodetect/default</param> /// <param name="checksumAlgorithm">Hash algorithm used to calculate file checksum.</param> /// <param name="normalizedFilePath">If given <paramref name="file"/> opens successfully, set to normalized absolute path of the file, null otherwise.</param> /// <returns>File content or null on failure.</returns> internal SourceText ReadFileContent(CommandLineSourceFile file, IList <DiagnosticInfo> diagnostics, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, out string normalizedFilePath) { try { using (var data = new FileStream(file.Path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { normalizedFilePath = data.Name; return(EncodedStringText.Create(data, encoding, checksumAlgorithm)); } } catch (Exception e) { diagnostics.Add(ToFileReadDiagnostics(e, file)); normalizedFilePath = null; return(null); } }
public void IsBinary() { Assert.False(EncodedStringText.IsBinary("")); Assert.False(EncodedStringText.IsBinary("\0abc")); Assert.False(EncodedStringText.IsBinary("a\0bc")); Assert.False(EncodedStringText.IsBinary("abc\0")); Assert.False(EncodedStringText.IsBinary("a\0b\0c")); Assert.True(EncodedStringText.IsBinary("\0\0abc")); Assert.True(EncodedStringText.IsBinary("a\0\0bc")); Assert.True(EncodedStringText.IsBinary("abc\0\0")); var encoding = Encoding.GetEncoding(1252); Assert.False(EncodedStringText.IsBinary(encoding.GetString(new byte[] { 0x81, 0x8D, 0x8F, 0x90, 0x9D }))); Assert.False(EncodedStringText.IsBinary("abc def baz aeiouy äëïöüû")); Assert.True(EncodedStringText.IsBinary(encoding.GetString(ProprietaryTestResources.NetFX.v4_0_30319.System))); }
public void ContentEquals() { var f = SourceText.From("foo", Encoding.UTF8); Assert.True(f.ContentEquals(SourceText.From("foo", Encoding.UTF8))); Assert.False(f.ContentEquals(SourceText.From("fooo", Encoding.UTF8))); Assert.True(SourceText.From("foo", Encoding.UTF8).ContentEquals(SourceText.From("foo", Encoding.UTF8))); var e1 = EncodedStringText.Create(new MemoryStream(Encoding.Unicode.GetBytes("foo")), Encoding.Unicode); var e2 = EncodedStringText.Create(new MemoryStream(Encoding.UTF8.GetBytes("foo")), Encoding.UTF8); Assert.True(e1.ContentEquals(e1)); Assert.True(f.ContentEquals(e1)); Assert.True(e1.ContentEquals(f)); Assert.True(e2.ContentEquals(e2)); Assert.True(e1.ContentEquals(e2)); Assert.True(e2.ContentEquals(e1)); }
public void ContentEquals() { var f = SourceText.From(HelloWorld, s_utf8); Assert.True(f.ContentEquals(SourceText.From(HelloWorld, s_utf8))); Assert.False(f.ContentEquals(SourceText.From(HelloWorld + "o", s_utf8))); Assert.True(SourceText.From(HelloWorld, s_utf8).ContentEquals(SourceText.From(HelloWorld, s_utf8))); var e1 = EncodedStringText.Create(new MemoryStream(s_unicode.GetBytes(HelloWorld)), s_unicode); var e2 = EncodedStringText.Create(new MemoryStream(s_utf8.GetBytes(HelloWorld)), s_utf8); Assert.True(e1.ContentEquals(e1)); Assert.True(f.ContentEquals(e1)); Assert.True(e1.ContentEquals(f)); Assert.True(e2.ContentEquals(e2)); Assert.True(e1.ContentEquals(e2)); Assert.True(e2.ContentEquals(e1)); }
/// <summary> /// Reads content of a source file. /// </summary> /// <param name="file">Source file information.</param> /// <param name="diagnostics">Storage for diagnostics.</param> /// <param name="normalizedFilePath">If given <paramref name="file"/> opens successfully, set to normalized absolute path of the file, null otherwise.</param> /// <returns>File content or null on failure.</returns> internal SourceText TryReadFileContent(CommandLineSourceFile file, IList <DiagnosticInfo> diagnostics, out string normalizedFilePath) { var filePath = file.Path; try { using (var data = OpenFileForReadWithSmallBufferOptimization(filePath)) { normalizedFilePath = data.Name; return(EncodedStringText.Create(data, Arguments.Encoding, Arguments.ChecksumAlgorithm, canBeEmbedded: EmbeddedSourcePaths.Contains(file.Path))); } } catch (Exception e) { diagnostics.Add(ToFileReadDiagnostics(this.MessageProvider, e, filePath)); normalizedFilePath = null; return(null); } }
private SourceText?TryGetEmbeddedSourceText(DocumentHandle handle) { var handles = _pdbReader.GetCustomDebugInformation(handle); foreach (var cdiHandle in handles) { var cdi = _pdbReader.GetCustomDebugInformation(cdiHandle); var guid = _pdbReader.GetGuid(cdi.Kind); if (guid == PortableCustomDebugInfoKinds.EmbeddedSource) { var blob = _pdbReader.GetBlobBytes(cdi.Value); if (blob is not null) { var uncompressedSize = BitConverter.ToInt32(blob, 0); var stream = new MemoryStream(blob, sizeof(int), blob.Length - sizeof(int)); if (uncompressedSize != 0) { var decompressed = new MemoryStream(uncompressedSize); using (var deflater = new DeflateStream(stream, CompressionMode.Decompress)) { deflater.CopyTo(decompressed); } if (decompressed.Length != uncompressedSize) { return(null); } stream = decompressed; } using (stream) { return(EncodedStringText.Create(stream)); } } } } return(null); }
public void Decode_NonUtf8() { // Unicode text with extended characters that map to interesting code points in CodePage 1252. var text = "abc def baz aeiouy \u20ac\u2019\u00a4\u00b6\u00c9\u00db\u00ed\u00ff"; // The same text encoded in CodePage 1252 which happens to be an illegal sequence if decoded as Utf-8. var bytes = new byte[] { 0x61, 0x62, 0x63, 0x20, 0x64, 0x65, 0x66, 0x20, 0x62, 0x61, 0x7a, 0x20, 0x61, 0x65, 0x69, 0x6f, 0x75, 0x79, 0x20, 0x80, 0x92, 0xA4, 0xB6, 0xC9, 0xDB, 0xED, 0xFF }; var utf8 = new UTF8Encoding(false, true); // bytes should not decode to UTF-8 using (var stream = new MemoryStream(bytes)) { Assert.Throws(typeof(DecoderFallbackException), () => { EncodedStringText.Decode(stream, utf8, SourceHashAlgorithm.Sha1); }); Assert.True(stream.CanRead); } // Detect encoding should correctly pick CodePage 1252 using (var stream = new MemoryStream(bytes)) { var sourceText = EncodedStringText.Create(stream); Assert.Equal(text, sourceText.ToString()); // Check for a complete Encoding implementation. Assert.Equal(1252, sourceText.Encoding.CodePage); Assert.NotNull(sourceText.Encoding.GetEncoder()); Assert.NotNull(sourceText.Encoding.GetDecoder()); Assert.Equal(2, sourceText.Encoding.GetMaxByteCount(1)); Assert.Equal(1, sourceText.Encoding.GetMaxCharCount(1)); Assert.Equal(text, sourceText.Encoding.GetString(bytes)); Assert.True(stream.CanRead); } }
/// <summary> /// Reads content of a source file. /// </summary> /// <param name="file">Source file information.</param> /// <param name="diagnostics">Storage for diagnostics.</param> /// <param name="encoding">Encoding to use or 'null' for autodetect/default</param> /// <param name="checksumAlgorithm">Hash algorithm used to calculate file checksum.</param> /// <param name="normalizedFilePath">If given <paramref name="file"/> opens successfully, set to normalized absolute path of the file, null otherwise.</param> /// <returns>File content or null on failure.</returns> internal SourceText ReadFileContent(CommandLineSourceFile file, IList <DiagnosticInfo> diagnostics, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, out string normalizedFilePath) { try { // PERF: Using a very small buffer size for the FileStream opens up an optimization within EncodedStringText where // we read the entire FileStream into a byte array in one shot. For files that are actually smaller than the buffer // size, FileStream.Read still allocates the internal buffer. using (var data = PortableShim.FileStream.Create(file.Path, PortableShim.FileMode.Open, PortableShim.FileAccess.Read, PortableShim.FileShare.ReadWrite, bufferSize: 1, options: PortableShim.FileOptions.None)) { normalizedFilePath = (string)PortableShim.FileStream.Name.GetValue(data); return(EncodedStringText.Create(data, encoding, checksumAlgorithm)); } } catch (Exception e) { diagnostics.Add(ToFileReadDiagnostics(e, file)); normalizedFilePath = null; return(null); } }
public void Decode_NonUtf8() { var utf8 = new UTF8Encoding(false, true); var text = "abc def baz aeiouy " + Encoding.Default.GetString(new byte[] { 0x80, 0x92, 0xA4, 0xB6, 0xC9, 0xDB, 0xED, 0xFF }); var bytes = GetBytes(Encoding.Default, text); // Encoding.Default should not decode to UTF-8 using (var stream = new MemoryStream(bytes)) { Assert.Throws(typeof(DecoderFallbackException), () => EncodedStringText.Decode(stream, utf8)); Assert.True(stream.CanRead); } // Detect encoding should correctly pick Encoding.Default using (var stream = new MemoryStream(bytes)) { Assert.Equal(text, EncodedStringText.DetectEncodingAndDecode(stream)); Assert.True(stream.CanRead); } }