/// <summary>
 /// Reads the contents of <paramref name="resolvedPath"/> and returns a <see cref="SourceText"/>.
 /// </summary>
 /// <param name="resolvedPath">Path returned by <see cref="ResolveReference(string, string)"/>.</param>
 public virtual SourceText ReadText(string resolvedPath)
 {
     using (var stream = OpenRead(resolvedPath))
     {
         return(EncodedStringText.Create(stream));
     }
 }
        public void FileStreamEncodedText()
        {
            const string expectedText =
                "\r\n" +
                "class Program\r\n" +
                "{\r\n" +
                "    static void Main()\r\n" +
                "    {\r\n" +
                "        string s = \"class C { \u0410\u0411\u0412 x; }\";\r\n" +
                "        foreach (char ch in s) System.Console.WriteLine(\"{0:x2}\", (int)ch);\r\n" +
                "    }\r\n" +
                "}\r\n";


            var encodings = new Encoding[]
            {
                new UnicodeEncoding(bigEndian: true, byteOrderMark: true),
                new UnicodeEncoding(bigEndian: false, byteOrderMark: true),
                new UTF8Encoding(encoderShouldEmitUTF8Identifier: true),
            };

            foreach (var encoding in encodings)
            {
                var tmpFile = Temp.CreateFile();

                File.WriteAllText(tmpFile.Path, expectedText, encoding);

                using (FileStream fs = new FileStream(tmpFile.Path, FileMode.Open, FileAccess.Read))
                {
                    var encodedText = EncodedStringText.Create(fs);
                    Assert.Equal(encoding.CodePage, encodedText.Encoding.CodePage);
                    Assert.Equal(expectedText, encodedText.ToString());
                }
            }
        }
        public void TestMultithreadedDecoding()
        {
            const string expectedText =
                "\r\n" +
                "class Program\r\n" +
                "{\r\n" +
                "    static void Main()\r\n" +
                "    {\r\n" +
                "        string s = \"class C { \u0410\u0411\u0412 x; }\";\r\n" +
                "        foreach (char ch in s) System.Console.WriteLine(\"{0:x2}\", (int)ch);\r\n" +
                "    }\r\n" +
                "}\r\n";

            var    encoding = new UTF8Encoding(false);
            string path     = Temp.CreateFile().WriteAllBytes(encoding.GetBytes(expectedText)).Path;

            var parallelOptions = new ParallelOptions {
                MaxDegreeOfParallelism = Environment.ProcessorCount * 2
            };

            Parallel.For(0, 500, parallelOptions, i =>
            {
                using (var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read))
                {
                    var sourceText = EncodedStringText.Create(stream);
                    Assert.Equal(expectedText, sourceText.ToString());
                }
            });
        }
        public void Decode_NonUtf8()
        {
            var utf8  = new UTF8Encoding(false, true);
            var text  = "abc def baz aeiouy " + Encoding.Default.GetString(new byte[] { 0x80, 0x92, 0xA4, 0xB6, 0xC9, 0xDB, 0xED, 0xFF });
            var bytes = Encoding.Default.GetBytesWithPreamble(text);

            // Encoding.Default should not decode to UTF-8
            using (var stream = new MemoryStream(bytes))
            {
                Assert.Throws(typeof(DecoderFallbackException), () =>
                {
                    EncodedStringText.Decode(stream, utf8, SourceHashAlgorithm.Sha1);
                });

                Assert.True(stream.CanRead);
            }

            // Detect encoding should correctly pick Encoding.Default
            using (var stream = new MemoryStream(bytes))
            {
                var sourceText = EncodedStringText.Create(stream);
                Assert.Equal(text, sourceText.ToString());
                Assert.Equal(Encoding.Default, sourceText.Encoding);
                Assert.True(stream.CanRead);
            }
        }
Example #5
0
        public async Task EncodedEmbeddedSource_SJIS_FallbackEncoding(Location pdbLocation)
        {
            var source = @"
public class C
{
    // ワ
    public event System.EventHandler E { add { } remove { } }
}";

            var encoding = Encoding.GetEncoding("SJIS");

            await RunTestAsync(async path =>
            {
                using var ms          = new MemoryStream(encoding.GetBytes(source));
                var encodedSourceText = EncodedStringText.Create(ms, encoding, canBeEmbedded: true);

                var(project, symbol) = await CompileAndFindSymbolAsync(path, pdbLocation, Location.Embedded, encodedSourceText, c => c.GetMember("C.E"), fallbackEncoding: encoding);

                var(actualText, _) = await GetGeneratedSourceTextAsync(project, symbol, Location.Embedded, expectNullResult: false);

                AssertEx.NotNull(actualText);
                AssertEx.NotNull(actualText.Encoding);
                AssertEx.Equal(encoding.WebName, actualText.Encoding.WebName);
                AssertEx.EqualOrDiff(source, actualText.ToString());
            });
        }
 protected virtual SourceText Create(string source)
 {
     byte[] buffer = GetBytes(Encoding.Default, source);
     using (var stream = new MemoryStream(buffer, 0, buffer.Length, writable: false, publiclyVisible: true))
     {
         return(EncodedStringText.Create(stream));
     }
 }
 protected override SourceText Create(string source)
 {
     byte[] buffer = GetBytes(new UTF8Encoding(encoderShouldEmitUTF8Identifier: false), source);
     using (var stream = new MemoryStream(buffer, 0, buffer.Length, writable: false, publiclyVisible: true))
     {
         return(EncodedStringText.Create(stream));
     }
 }
 protected override SourceText Create(string source)
 {
     byte[] buffer = GetBytes(Encoding.BigEndianUnicode, source);
     using (var stream = new MemoryStream(buffer, 0, buffer.Length, writable: false, publiclyVisible: true))
     {
         return(EncodedStringText.Create(stream));
     }
 }
Example #9
0
        public void FromBytes_EncodingFallbackCase()
        {
            var source = EncodedStringText.Create(new MemoryStream(new byte[] { 0xA9, 0x0D, 0x0A }), canBeEmbedded: true);
            var text   = EmbeddedText.FromSource("pathToLarge", source);

            Assert.Equal("pathToLarge", text.FilePath);
            Assert.Equal(SourceHashAlgorithm.Sha1, text.ChecksumAlgorithm);
            AssertEx.Equal(source.GetChecksum(), text.Checksum);
        }
Example #10
0
            internal Document GetDocument(MetadataAsSourceFile file)
            {
                using var reader = File.OpenRead(file.FilePath);
                var stringText = EncodedStringText.Create(reader);

                Assert.True(_metadataAsSourceService.TryAddDocumentToWorkspace(file.FilePath, stringText.Container));

                return(stringText.Container.GetRelatedDocuments().Single());
            }
Example #11
0
 public SourceText CreateText(
     Stream stream,
     Encoding?defaultEncoding,
     CancellationToken cancellationToken = default
     )
 {
     cancellationToken.ThrowIfCancellationRequested();
     return(EncodedStringText.Create(stream, defaultEncoding));
 }
Example #12
0
        public void FileStreamEncodedTextEmpty()
        {
            var tmpFile = Temp.CreateFile();

            using (FileStream fs = new FileStream(tmpFile.Path, FileMode.Open, FileAccess.Read))
            {
                var encodedText = EncodedStringText.Create(fs);
                Assert.Equal(0, encodedText.Length);
            }
        }
Example #13
0
        private static SourceText CreateMemoryStreamBasedEncodedText(byte[] bytes, Encoding readEncodingOpt, SourceHashAlgorithm algorithm = SourceHashAlgorithm.Sha1)
        {
            // For testing purposes, create a bigger buffer so that we verify
            // that the implementation only uses the part that's covered by the stream and not the entire array.
            byte[] buffer = new byte[bytes.Length + 10];
            bytes.CopyTo(buffer, 0);

            using (var stream = new MemoryStream(buffer, 0, bytes.Length, writable: true, publiclyVisible: true))
            {
                return(EncodedStringText.Create(stream, readEncodingOpt, algorithm));
            }
        }
Example #14
0
 public void HorizontalEllipsis()
 {
     // Character 0x85 in CodePage 1252 is a horizontal ellipsis.
     // If decoded as Latin-1, then it's incorrectly treated as \u0085 which
     // is a line break ('NEXT LINE').
     byte[] srcBytes = new[] { (byte)0x85 };
     using (var ms = new MemoryStream(srcBytes))
     {
         var sourceText = EncodedStringText.Create(ms);
         Assert.Equal('\u2026', sourceText[0]);
     }
 }
        public SourceText CreateText(Stream stream, Encoding defaultEncoding, CancellationToken cancellationToken = default(CancellationToken))
        {
            var encoding = EncodedStringText.TryReadByteOrderMark(stream)
                           ?? defaultEncoding
                           ?? new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);

            // Close the stream here since we might throw an exception trying to determine the encoding
            using (stream)
            {
                return(CreateTextInternal(stream, encoding, cancellationToken)
                       ?? CreateTextInternal(stream, Encoding.Default, cancellationToken));
            }
        }
        public void Decode_Utf8()
        {
            var utf8  = new UTF8Encoding(false, true);
            var text  = "abc def baz aeiouy äëïöüû";
            var bytes = GetBytes(utf8, text);

            // Detect encoding should correctly pick UTF-8
            using (var stream = new MemoryStream(bytes))
            {
                Assert.Equal(text, EncodedStringText.DetectEncodingAndDecode(stream));
                Assert.True(stream.CanRead);
            }
        }
Example #17
0
        public void TryReadByteOrderMark()
        {
            Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[0])));

            Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xef })));
            Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xef, 0xbb })));
            Assert.Equal("Unicode (UTF-8)", EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xef, 0xBB, 0xBF })).EncodingName);

            Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xff })));
            Assert.Equal("Unicode", EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xff, 0xfe })).EncodingName);

            Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xfe })));
            Assert.Equal("Unicode (Big-Endian)", EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xfe, 0xff })).EncodingName);
        }
Example #18
0
        public void TryReadByteOrderMark()
        {
            Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[0])));

            Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xef })));
            Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xef, 0xbb })));
            Assert.Equal(Encoding.UTF8, EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xef, 0xBB, 0xBF })));

            Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xff })));
            Assert.Equal(Encoding.Unicode, EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xff, 0xfe })));

            Assert.Null(EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xfe })));
            Assert.Equal(Encoding.BigEndianUnicode, EncodedStringText.TryReadByteOrderMark(new MemoryStream(new byte[] { 0xfe, 0xff })));
        }
Example #19
0
        private static EncodedStringText CreateMemoryStreamBasedEncodedText(string text, Encoding writeEncoding, Encoding readEncodingOpt)
        {
            byte[] bytes = writeEncoding.GetBytesWithPreamble(text);

            // For testing purposes, create a bigger buffer so that we verify
            // that the implementation only uses the part that's covered by the stream and not the entire array.
            byte[] buffer = new byte[bytes.Length + 10];
            bytes.CopyTo(buffer, 0);

            using (var stream = new MemoryStream(buffer, 0, bytes.Length, writable: true, publiclyVisible: true))
            {
                return(EncodedStringText.Create(stream, readEncodingOpt));
            }
        }
Example #20
0
        public void Decode_Utf8()
        {
            var utf8  = new UTF8Encoding(false, true);
            var text  = "abc def baz aeiouy äëïöüû";
            var bytes = utf8.GetBytesWithPreamble(text);

            // Detect encoding should correctly pick UTF-8
            using (var stream = new MemoryStream(bytes))
            {
                var sourceText = EncodedStringText.Create(stream);
                Assert.Equal(text, sourceText.ToString());
                Assert.Equal(Encoding.UTF8.EncodingName, sourceText.Encoding.EncodingName);
                Assert.True(stream.CanRead);
            }
        }
Example #21
0
        /// <summary>
        /// Produces a syntax tree by parsing the source file.
        /// </summary>
        public static SyntaxTree ParseFile(
            string path,
            CSharpParseOptions options          = null,
            CancellationToken cancellationToken = default(CancellationToken))
        {
            if (string.IsNullOrEmpty(path))
            {
                throw new ArgumentException("path");
            }

            using (var data = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
            {
                return(ParseText(EncodedStringText.Create(data), options, path, cancellationToken));
            }
        }
Example #22
0
 /// <summary>
 /// Reads content of a source file.
 /// </summary>
 /// <param name="file">Source file information.</param>
 /// <param name="diagnostics">Storage for diagnostics.</param>
 /// <param name="encoding">Encoding to use or 'null' for autodetect/default</param>
 /// <param name="checksumAlgorithm">Hash algorithm used to calculate file checksum.</param>
 /// <param name="normalizedFilePath">If given <paramref name="file"/> opens successfully, set to normalized absolute path of the file, null otherwise.</param>
 /// <returns>File content or null on failure.</returns>
 internal SourceText ReadFileContent(CommandLineSourceFile file, IList <DiagnosticInfo> diagnostics, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, out string normalizedFilePath)
 {
     try
     {
         using (var data = new FileStream(file.Path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
         {
             normalizedFilePath = data.Name;
             return(EncodedStringText.Create(data, encoding, checksumAlgorithm));
         }
     }
     catch (Exception e)
     {
         diagnostics.Add(ToFileReadDiagnostics(e, file));
         normalizedFilePath = null;
         return(null);
     }
 }
Example #23
0
        public void IsBinary()
        {
            Assert.False(EncodedStringText.IsBinary(""));

            Assert.False(EncodedStringText.IsBinary("\0abc"));
            Assert.False(EncodedStringText.IsBinary("a\0bc"));
            Assert.False(EncodedStringText.IsBinary("abc\0"));
            Assert.False(EncodedStringText.IsBinary("a\0b\0c"));

            Assert.True(EncodedStringText.IsBinary("\0\0abc"));
            Assert.True(EncodedStringText.IsBinary("a\0\0bc"));
            Assert.True(EncodedStringText.IsBinary("abc\0\0"));

            var encoding = Encoding.GetEncoding(1252);

            Assert.False(EncodedStringText.IsBinary(encoding.GetString(new byte[] { 0x81, 0x8D, 0x8F, 0x90, 0x9D })));
            Assert.False(EncodedStringText.IsBinary("abc def baz aeiouy äëïöüû"));
            Assert.True(EncodedStringText.IsBinary(encoding.GetString(ProprietaryTestResources.NetFX.v4_0_30319.System)));
        }
Example #24
0
        public void ContentEquals()
        {
            var f = SourceText.From("foo", Encoding.UTF8);

            Assert.True(f.ContentEquals(SourceText.From("foo", Encoding.UTF8)));
            Assert.False(f.ContentEquals(SourceText.From("fooo", Encoding.UTF8)));
            Assert.True(SourceText.From("foo", Encoding.UTF8).ContentEquals(SourceText.From("foo", Encoding.UTF8)));

            var e1 = EncodedStringText.Create(new MemoryStream(Encoding.Unicode.GetBytes("foo")), Encoding.Unicode);
            var e2 = EncodedStringText.Create(new MemoryStream(Encoding.UTF8.GetBytes("foo")), Encoding.UTF8);

            Assert.True(e1.ContentEquals(e1));
            Assert.True(f.ContentEquals(e1));
            Assert.True(e1.ContentEquals(f));

            Assert.True(e2.ContentEquals(e2));
            Assert.True(e1.ContentEquals(e2));
            Assert.True(e2.ContentEquals(e1));
        }
Example #25
0
        public void ContentEquals()
        {
            var f = SourceText.From(HelloWorld, s_utf8);

            Assert.True(f.ContentEquals(SourceText.From(HelloWorld, s_utf8)));
            Assert.False(f.ContentEquals(SourceText.From(HelloWorld + "o", s_utf8)));
            Assert.True(SourceText.From(HelloWorld, s_utf8).ContentEquals(SourceText.From(HelloWorld, s_utf8)));

            var e1 = EncodedStringText.Create(new MemoryStream(s_unicode.GetBytes(HelloWorld)), s_unicode);
            var e2 = EncodedStringText.Create(new MemoryStream(s_utf8.GetBytes(HelloWorld)), s_utf8);

            Assert.True(e1.ContentEquals(e1));
            Assert.True(f.ContentEquals(e1));
            Assert.True(e1.ContentEquals(f));

            Assert.True(e2.ContentEquals(e2));
            Assert.True(e1.ContentEquals(e2));
            Assert.True(e2.ContentEquals(e1));
        }
Example #26
0
        /// <summary>
        /// Reads content of a source file.
        /// </summary>
        /// <param name="file">Source file information.</param>
        /// <param name="diagnostics">Storage for diagnostics.</param>
        /// <param name="normalizedFilePath">If given <paramref name="file"/> opens successfully, set to normalized absolute path of the file, null otherwise.</param>
        /// <returns>File content or null on failure.</returns>
        internal SourceText TryReadFileContent(CommandLineSourceFile file, IList <DiagnosticInfo> diagnostics, out string normalizedFilePath)
        {
            var filePath = file.Path;

            try
            {
                using (var data = OpenFileForReadWithSmallBufferOptimization(filePath))
                {
                    normalizedFilePath = data.Name;
                    return(EncodedStringText.Create(data, Arguments.Encoding, Arguments.ChecksumAlgorithm, canBeEmbedded: EmbeddedSourcePaths.Contains(file.Path)));
                }
            }
            catch (Exception e)
            {
                diagnostics.Add(ToFileReadDiagnostics(this.MessageProvider, e, filePath));
                normalizedFilePath = null;
                return(null);
            }
        }
Example #27
0
        private SourceText?TryGetEmbeddedSourceText(DocumentHandle handle)
        {
            var handles = _pdbReader.GetCustomDebugInformation(handle);

            foreach (var cdiHandle in handles)
            {
                var cdi  = _pdbReader.GetCustomDebugInformation(cdiHandle);
                var guid = _pdbReader.GetGuid(cdi.Kind);
                if (guid == PortableCustomDebugInfoKinds.EmbeddedSource)
                {
                    var blob = _pdbReader.GetBlobBytes(cdi.Value);
                    if (blob is not null)
                    {
                        var uncompressedSize = BitConverter.ToInt32(blob, 0);
                        var stream           = new MemoryStream(blob, sizeof(int), blob.Length - sizeof(int));

                        if (uncompressedSize != 0)
                        {
                            var decompressed = new MemoryStream(uncompressedSize);

                            using (var deflater = new DeflateStream(stream, CompressionMode.Decompress))
                            {
                                deflater.CopyTo(decompressed);
                            }

                            if (decompressed.Length != uncompressedSize)
                            {
                                return(null);
                            }

                            stream = decompressed;
                        }

                        using (stream)
                        {
                            return(EncodedStringText.Create(stream));
                        }
                    }
                }
            }

            return(null);
        }
Example #28
0
        public void Decode_NonUtf8()
        {
            // Unicode text with extended characters that map to interesting code points in CodePage 1252.
            var text = "abc def baz aeiouy \u20ac\u2019\u00a4\u00b6\u00c9\u00db\u00ed\u00ff";

            // The same text encoded in CodePage 1252 which happens to be an illegal sequence if decoded as Utf-8.
            var bytes = new byte[]
            {
                0x61, 0x62, 0x63, 0x20, 0x64, 0x65, 0x66, 0x20, 0x62, 0x61, 0x7a, 0x20, 0x61, 0x65, 0x69, 0x6f, 0x75, 0x79, 0x20,
                0x80, 0x92, 0xA4, 0xB6, 0xC9, 0xDB, 0xED, 0xFF
            };

            var utf8 = new UTF8Encoding(false, true);

            // bytes should not decode to UTF-8
            using (var stream = new MemoryStream(bytes))
            {
                Assert.Throws(typeof(DecoderFallbackException), () =>
                {
                    EncodedStringText.Decode(stream, utf8, SourceHashAlgorithm.Sha1);
                });

                Assert.True(stream.CanRead);
            }

            // Detect encoding should correctly pick CodePage 1252
            using (var stream = new MemoryStream(bytes))
            {
                var sourceText = EncodedStringText.Create(stream);
                Assert.Equal(text, sourceText.ToString());

                // Check for a complete Encoding implementation.
                Assert.Equal(1252, sourceText.Encoding.CodePage);
                Assert.NotNull(sourceText.Encoding.GetEncoder());
                Assert.NotNull(sourceText.Encoding.GetDecoder());
                Assert.Equal(2, sourceText.Encoding.GetMaxByteCount(1));
                Assert.Equal(1, sourceText.Encoding.GetMaxCharCount(1));
                Assert.Equal(text, sourceText.Encoding.GetString(bytes));

                Assert.True(stream.CanRead);
            }
        }
Example #29
0
 /// <summary>
 /// Reads content of a source file.
 /// </summary>
 /// <param name="file">Source file information.</param>
 /// <param name="diagnostics">Storage for diagnostics.</param>
 /// <param name="encoding">Encoding to use or 'null' for autodetect/default</param>
 /// <param name="checksumAlgorithm">Hash algorithm used to calculate file checksum.</param>
 /// <param name="normalizedFilePath">If given <paramref name="file"/> opens successfully, set to normalized absolute path of the file, null otherwise.</param>
 /// <returns>File content or null on failure.</returns>
 internal SourceText ReadFileContent(CommandLineSourceFile file, IList <DiagnosticInfo> diagnostics, Encoding encoding, SourceHashAlgorithm checksumAlgorithm, out string normalizedFilePath)
 {
     try
     {
         // PERF: Using a very small buffer size for the FileStream opens up an optimization within EncodedStringText where
         // we read the entire FileStream into a byte array in one shot. For files that are actually smaller than the buffer
         // size, FileStream.Read still allocates the internal buffer.
         using (var data = PortableShim.FileStream.Create(file.Path, PortableShim.FileMode.Open, PortableShim.FileAccess.Read, PortableShim.FileShare.ReadWrite, bufferSize: 1, options: PortableShim.FileOptions.None))
         {
             normalizedFilePath = (string)PortableShim.FileStream.Name.GetValue(data);
             return(EncodedStringText.Create(data, encoding, checksumAlgorithm));
         }
     }
     catch (Exception e)
     {
         diagnostics.Add(ToFileReadDiagnostics(e, file));
         normalizedFilePath = null;
         return(null);
     }
 }
Example #30
0
        public void Decode_NonUtf8()
        {
            var utf8  = new UTF8Encoding(false, true);
            var text  = "abc def baz aeiouy " + Encoding.Default.GetString(new byte[] { 0x80, 0x92, 0xA4, 0xB6, 0xC9, 0xDB, 0xED, 0xFF });
            var bytes = GetBytes(Encoding.Default, text);

            // Encoding.Default should not decode to UTF-8
            using (var stream = new MemoryStream(bytes))
            {
                Assert.Throws(typeof(DecoderFallbackException), () => EncodedStringText.Decode(stream, utf8));
                Assert.True(stream.CanRead);
            }

            // Detect encoding should correctly pick Encoding.Default
            using (var stream = new MemoryStream(bytes))
            {
                Assert.Equal(text, EncodedStringText.DetectEncodingAndDecode(stream));
                Assert.True(stream.CanRead);
            }
        }