public static void GetChars_Encoding_ReadOnlySpan_IBufferWriter() { ArrayBufferWriter <char> writer = new ArrayBufferWriter <char>(); // First, a small input that goes through the one-shot code path. ReadOnlySpan <byte> inputData = Encoding.UTF8.GetBytes("Hello"); long charsWritten = EncodingExtensions.GetChars(Encoding.UTF8, inputData, writer); Assert.Equal(5, charsWritten); Assert.Equal("Hello", writer.WrittenSpan.ToString()); // Then, a large input that goes through the chunked path. // We use U+1234 because it's a 3-byte UTF-8 sequence, which means it'll be split in // several locations by the internal GetBytes chunking logic. This helps us test // that we're flowing the 'flush' parameter through the system correctly. writer = new ArrayBufferWriter <char>(); inputData = Encoding.UTF8.GetBytes(new string('\u1234', 5_000_000)).Concat(new byte[] { 0xE0 }).ToArray(); charsWritten = EncodingExtensions.GetChars(Encoding.UTF8, inputData, writer); Assert.Equal(5_000_001, charsWritten); // 5 MM for data, 1 for replacement char at end // Now make sure all of the data was decoded properly. Assert.Equal( expected: new string('\u1234', 5_000_000) + '\ufffd', actual: writer.WrittenSpan.ToString()); }
public static void GetBytes_Encoding_ReadOnlySequence_Span() { Span <byte> destination = stackalloc byte[32]; // First try the single-segment code path. ReadOnlySequence <char> sequence = new ReadOnlySequence <char>("Hello!".ToCharArray()); Assert.Equal( expected: Encoding.UTF8.GetBytes("Hello!"), actual: destination.Slice(0, EncodingExtensions.GetBytes(Encoding.UTF8, sequence, destination)).ToArray()); // Next try the multi-segment code path. // We've intentionally split multi-char subsequences here to test flushing mechanisms. sequence = SequenceFactory.Create( new char[] { '\u0020' }, // U+0020 new char[] { '\u0061', '\u0080' }, // U+0061 and U+0080 (continues on next line) new char[] { '\ud800' }, // U+10000 (continues on next line) new char[] { }, // empty segment, just to make sure we handle it correctly new char[] { '\udc00', '\udbff' }, // (cont.) + U+10FFFF (continues on next line) new char[] { '\udfff' }, // (cont.) new char[] { '\ud800' }); // leftover data (should be replaced) Assert.Equal( expected: Encoding.UTF8.GetBytes("\u0020\u0061\u0080\U00010000\U0010FFFF\ufffd"), actual: destination.Slice(0, EncodingExtensions.GetBytes(Encoding.UTF8, sequence, destination)).ToArray()); }
public static void GetChars_Encoding_ReadOnlySpan_IBufferWriter_ParamChecks() { IBufferWriter <char> writer = new ArrayBufferWriter <char>(); Assert.Throws <ArgumentNullException>("encoding", () => EncodingExtensions.GetChars((Encoding)null, ReadOnlySpan <byte> .Empty, writer)); Assert.Throws <ArgumentNullException>("writer", () => EncodingExtensions.GetChars(Encoding.UTF8, ReadOnlySpan <byte> .Empty, (IBufferWriter <char>)null)); }
public static void Convert_Encoder_ReadOnlySpan_IBufferWriter() { Encoder encoder = Encoding.UTF8.GetEncoder(); ArrayBufferWriter <byte> writer = new ArrayBufferWriter <byte>(); // First, a small input with no flushing and no leftover data. ReadOnlySpan <char> inputData = "Hello"; EncodingExtensions.Convert(encoder, inputData, writer, flush: false, out long bytesUsed, out bool completed); Assert.Equal(5, bytesUsed); Assert.True(completed); // Then, a large input with no flushing and leftover data. inputData = new string('x', 20_000_000) + '\ud800'; EncodingExtensions.Convert(encoder, inputData, writer, flush: false, out bytesUsed, out completed); Assert.Equal(20_000_000, bytesUsed); Assert.False(completed); // Then, a large input with flushing and leftover data (should be replaced). inputData = '\udc00' + new string('x', 20_000_000) + '\ud800'; EncodingExtensions.Convert(encoder, inputData, writer, flush: true, out bytesUsed, out completed); Assert.Equal(20_000_007, bytesUsed); // 4 for supplementary at beginning, 3 for replacement at end Assert.True(completed); // Now make sure all of the data was encoded properly. // Use SequenceEqual instead of Assert.Equal for perf. Assert.True( Encoding.UTF8.GetBytes("Hello" + new string('x', 20_000_000) + "\U00010000" + new string('x', 20_000_000) + '\ufffd').AsSpan().SequenceEqual(writer.WrittenSpan)); }
public static void Convert_Decoder_ReadOnlySpan_IBufferWriter() { Decoder decoder = Encoding.UTF8.GetDecoder(); ArrayBufferWriter <char> writer = new ArrayBufferWriter <char>(); // First, a small input with no flushing and no leftover data. ReadOnlySpan <byte> inputData = Encoding.UTF8.GetBytes("Hello"); EncodingExtensions.Convert(decoder, inputData, writer, flush: false, out long charsUsed, out bool completed); Assert.Equal(5, charsUsed); Assert.True(completed); // Then, a large input with no flushing and leftover data. inputData = Encoding.UTF8.GetBytes(new string('x', 20_000_000)).Concat(new byte[] { 0xE0, 0xA0 }).ToArray(); EncodingExtensions.Convert(decoder, inputData, writer, flush: false, out charsUsed, out completed); Assert.Equal(20_000_000, charsUsed); Assert.False(completed); // Then, a large input with flushing and leftover data (should be replaced). inputData = new byte[] { 0x80 }.Concat(Encoding.UTF8.GetBytes(new string('x', 20_000_000))).Concat(new byte[] { 0xE0 }).ToArray(); EncodingExtensions.Convert(decoder, inputData, writer, flush: true, out charsUsed, out completed); Assert.Equal(20_000_002, charsUsed); // 1 for leftover at beginning, 1 for replacement at end Assert.True(completed); // Now make sure all of the data was decoded properly. Assert.Equal( expected: "Hello" + new string('x', 20_000_000) + '\u0800' + new string('x', 20_000_000) + '\ufffd', actual: writer.WrittenSpan.ToString()); }
private string ReadContent(Site site, HttpWebResponse response) { MemoryStream memoryStream = new MemoryStream(0x1000); using (Stream responseStream = response.GetResponseStream()) { if (responseStream == null) { return(string.Empty); } else { byte[] buffer = new byte[0x1000]; int bytes; while ((bytes = responseStream.Read(buffer, 0, buffer.Length)) > 0) { memoryStream.Write(buffer, 0, bytes); } } } byte[] contentBytes = memoryStream.StreamToBytes(); contentBytes = PreventCutOff(contentBytes); if (string.IsNullOrEmpty(site.EncodingName)) { var charSet = response.CharacterSet; Encoding htmlCharset = EncodingExtensions.GetEncoding(charSet, contentBytes); return(htmlCharset.GetString(contentBytes, 0, contentBytes.Length)); } else { return(site.Encoding.GetString(contentBytes, 0, contentBytes.Length)); } }
public bool LoadFromFile(string filename) { using (StreamReader sr = new StreamReader(filename, EncodingExtensions.GetEncoding(filename))) { return(LoadFromText(sr.ReadToEnd())); } }
public static void Convert_Encoder_ReadOnlySequence_IBufferWriter_ParamChecks() { Encoder encoder = Encoding.UTF8.GetEncoder(); IBufferWriter <byte> writer = new ArrayBufferWriter <byte>(); Assert.Throws <ArgumentNullException>("encoder", () => EncodingExtensions.Convert((Encoder)null, ReadOnlySequence <char> .Empty, writer, true, out _, out _)); Assert.Throws <ArgumentNullException>("writer", () => EncodingExtensions.Convert(encoder, ReadOnlySequence <char> .Empty, (IBufferWriter <byte>)null, true, out _, out _)); }
public static void Convert_Decoder_ReadOnlySpan_IBufferWriter_ParamChecks() { Decoder decoder = Encoding.UTF8.GetDecoder(); IBufferWriter <char> writer = new ArrayBufferWriter <char>(); Assert.Throws <ArgumentNullException>("decoder", () => EncodingExtensions.Convert((Decoder)null, ReadOnlySpan <byte> .Empty, writer, true, out _, out _)); Assert.Throws <ArgumentNullException>("writer", () => EncodingExtensions.Convert(decoder, ReadOnlySpan <byte> .Empty, (IBufferWriter <char>)null, true, out _, out _)); }
public static void GetChars_Encoding_ReadOnlySequence_IBufferWriter_ParamChecks() { ReadOnlySequence <byte> sequence = new ReadOnlySequence <byte>(new byte[0]); IBufferWriter <char> writer = new ArrayBufferWriter <char>(); Assert.Throws <ArgumentNullException>("encoding", () => EncodingExtensions.GetChars((Encoding)null, sequence, writer)); Assert.Throws <ArgumentNullException>("writer", () => EncodingExtensions.GetChars(Encoding.UTF8, sequence, (IBufferWriter <char>)null)); }
public static void GetBytes_Encoding_ReadOnlySequence_IBufferWriter_SingleSegment() { ReadOnlySequence <char> sequence = new ReadOnlySequence <char>("Hello".ToCharArray()); ArrayBufferWriter <byte> writer = new ArrayBufferWriter <byte>(); long bytesWritten = EncodingExtensions.GetBytes(Encoding.UTF8, sequence, writer); Assert.Equal(5, bytesWritten); Assert.Equal(Encoding.UTF8.GetBytes("Hello"), writer.WrittenSpan.ToArray()); }
public static void GetChars_Encoding_ReadOnlySequence_IBufferWriter_SingleSegment() { ReadOnlySequence <byte> sequence = new ReadOnlySequence <byte>(Encoding.UTF8.GetBytes("Hello")); ArrayBufferWriter <char> writer = new ArrayBufferWriter <char>(); long charsWritten = EncodingExtensions.GetChars(Encoding.UTF8, sequence, writer); Assert.Equal(5, charsWritten); Assert.Equal("Hello", writer.WrittenSpan.ToString()); }
public string Chardet(byte[] bytes, int index, int count) { var buffer = new byte[count]; Array.Copy(bytes, index, buffer, 0, count); var encoding = EncodingExtensions.GetEncoding(buffer); return(encoding.GetString(buffer)); }
[OuterLoop] // this test takes ~10 seconds on modern hardware since it operates over GBs of data public static void GetChars_Encoding_ReadOnlySequence_IBufferWriter_LargeMultiSegment() { ReadOnlySequence <byte> sequence = GetLargeRepeatingReadOnlySequence <byte>(AllScalarsAsUtf8, 1500); // ~ 6.5bn bytes of UTF-8 input RepeatingValidatingBufferWriter <char> writer = new RepeatingValidatingBufferWriter <char>(AllScalarsAsUtf16); long expectedCharsWritten = 1500 * (long)AllScalarsAsUtf16.Length; long actualCharsWritten = EncodingExtensions.GetChars(Encoding.UTF8, sequence, writer); Assert.Equal(expectedCharsWritten, actualCharsWritten); Assert.Equal(expectedCharsWritten, writer.TotalElementsWritten); // our writer will validate as data is written to it }
protected virtual string ReadContent(Site site, HttpResponseMessage response) { byte[] contentBytes = response.Content.ReadAsByteArrayAsync().Result; contentBytes = PreventCutOff(contentBytes); if (string.IsNullOrWhiteSpace(site.EncodingName)) { var charSet = response.Content.Headers.ContentType?.CharSet; Encoding htmlCharset = EncodingExtensions.GetEncoding(charSet, contentBytes); return(htmlCharset.GetString(contentBytes, 0, contentBytes.Length)); } return(site.Encoding.GetString(contentBytes, 0, contentBytes.Length)); }
private string GenerateMarkdownHtml(string path) { var bytes = File.ReadAllBytes(path); var encoding = EncodingExtensions.GetEncoding(path, bytes.Length); var md = encoding.GetString(bytes); md = WebUtility.HtmlEncode(md); var html = Resources.md2html.Replace("{{content}}", md); return(html); }
private string ReadContent(HttpResponseMessage response, HttpRequestTask requestTask) { byte[] contentBytes = response.Content.ReadAsByteArrayAsync().Result; contentBytes = PreventCutOff(contentBytes); if (requestTask.Encoding == null) { var charSet = response.Content.Headers.ContentType?.CharSet; var htmlCharset = EncodingExtensions.GetEncoding(charSet, contentBytes); return(htmlCharset.GetString(contentBytes, 0, contentBytes.Length)); } else { return(requestTask.Encoding.GetString(contentBytes, 0, contentBytes.Length)); } }
private string ReadContent(Site site, HttpResponseMessage response) { byte[] contentBytes = response.Content.ReadAsByteArrayAsync().Result; contentBytes = PreventCutOff(contentBytes); if (string.IsNullOrEmpty(site.EncodingName)) { var charSet = response.Content.Headers.ContentType == null ? null : response.Content.Headers.ContentType.CharSet; Encoding htmlCharset = EncodingExtensions.GetEncoding(charSet, contentBytes); return(htmlCharset.GetString(contentBytes, 0, contentBytes.Length)); } else { return(site.Encoding.GetString(contentBytes, 0, contentBytes.Length)); } }
public static void GetBytes_Encoding_ReadOnlySpan_IBufferWriter() { ArrayBufferWriter <byte> writer = new ArrayBufferWriter <byte>(); // First, a small input that goes through the one-shot code path. ReadOnlySpan <char> inputData = "Hello"; long bytesWritten = EncodingExtensions.GetBytes(Encoding.UTF8, inputData, writer); Assert.Equal(5, bytesWritten); Assert.Equal(Encoding.UTF8.GetBytes("Hello"), writer.WrittenSpan.ToArray()); // Then, a large input that goes through the chunked path. // We alternate between 1-char and 2-char sequences so that the input will be split in // several locations by the internal GetChars chunking logic. This helps us test // that we're flowing the 'flush' parameter through the system correctly. string largeString = string.Create(5_000_000, (object)null, (span, _) => { while (span.Length >= 3) { span[0] = '\u00EA'; // U+00EA LATIN SMALL LETTER E WITH CIRCUMFLEX span[1] = '\uD83D'; // U+1F405 TIGER span[2] = '\uDC05'; span = span.Slice(3); } // There are 2 bytes left over. Assert.Equal(2, span.Length); span[0] = 'x'; span[1] = 'y'; }); writer = new ArrayBufferWriter <byte>(); inputData = largeString + '\uD800'; // standalone lead surrogate at end of input, testing replacement bytesWritten = EncodingExtensions.GetBytes(Encoding.UTF8, inputData, writer); Assert.Equal(10_000_001, bytesWritten); // 9,999,998 for data + 3 for repalcement char at end // Now make sure all of the data was encoded properly. Assert.True(Encoding.UTF8.GetBytes(largeString + "\ufffd").AsSpan().SequenceEqual(writer.WrittenSpan)); }
public ServiceResult <UserDto> AuthenticateUser(string userName, string passWord) { var result = new ServiceResult <UserDto>(); try { var encodedMd5Password = EncodingExtensions.EncodeMD5(passWord); using (var session = Session) { var user = session.QueryOver <User>() .Where(u => u.UserName == userName && u.PassWord == passWord && u.Active == 1) .SingleOrDefault(); if (user == null) { return new ServiceResult <UserDto> { Errors = new List <Error> { new Error { Code = ErrorCode.LogInFail } } } } ; //Update last login for user user.LastLogin = DateTime.Now; session.SaveOrUpdate(user); session.Flush(); var userDto = user.FromEntityToDto(); result.Target = userDto; } } catch (Exception ex) { result.AddError(ErrorCode.LogInFail, ex); } return(result); }
public static void GetString_Encoding_ReadOnlySequence() { // First try the single-segment code path. ReadOnlySequence <byte> sequence = new ReadOnlySequence <byte>(Encoding.UTF8.GetBytes("Hello!")); Assert.Equal("Hello!", EncodingExtensions.GetString(Encoding.UTF8, sequence)); // Next try the multi-segment code path. // We've intentionally split multi-byte subsequences here to test flushing mechanisms. sequence = SequenceFactory.Create( new byte[] { 0x20 }, // U+0020 new byte[] { 0x61, 0xC2 }, // U+0061 and U+0080 (continues on next line) new byte[] { 0x80, 0xED }, // (cont.) + U+D7FF (continues on next line) new byte[] { }, // empty segment, just to make sure we handle it correctly new byte[] { 0x9F, 0xBF, 0xF4, 0x80 }, // (cont.) + U+100000 (continues on next line) new byte[] { 0x80, 0x80 }, // (cont.) new byte[] { 0xC2 }); // leftover data (should be replaced) Assert.Equal("\u0020\u0061\u0080\ud7ff\U00100000\ufffd", EncodingExtensions.GetString(Encoding.UTF8, sequence)); }
public static void Convert_Decoder_ReadOnlySequence_IBufferWriter() { Decoder decoder = Encoding.UTF8.GetDecoder(); ArrayBufferWriter <char> writer = new ArrayBufferWriter <char>(); // First, input with no flushing and no leftover data. ReadOnlySequence <byte> inputData = SequenceFactory.Create( new byte[] { 0x20 }, // U+0020 new byte[] { 0x61, 0xC2 }, // U+0061 and U+0080 (continues on next line) new byte[] { 0x80, 0xED, 0x9F, 0xBF }); // (cont.) + U+D7FF EncodingExtensions.Convert(decoder, inputData, writer, flush: false, out long charsUsed, out bool completed); Assert.Equal(4, charsUsed); Assert.True(completed); // Then, input with no flushing and leftover data. inputData = SequenceFactory.Create( new byte[] { 0xF4, 0x80 }); // U+100000 (continues on next line) EncodingExtensions.Convert(decoder, inputData, writer, flush: false, out charsUsed, out completed); Assert.Equal(0, charsUsed); Assert.False(completed); // Then, input with flushing and leftover data (should be replaced). inputData = SequenceFactory.Create( new byte[] { 0x80, 0x80 }, // (cont.) new byte[] { 0xC2 }); // leftover data (should be replaced) EncodingExtensions.Convert(decoder, inputData, writer, flush: true, out charsUsed, out completed); Assert.Equal(3, charsUsed); Assert.True(completed); // Now make sure all of the data was decoded properly. Assert.Equal("\u0020\u0061\u0080\ud7ff\U00100000\ufffd", writer.WrittenSpan.ToString()); }
public static void Convert_Encoder_ReadOnlySequence_IBufferWriter() { Encoder encoder = Encoding.UTF8.GetEncoder(); ArrayBufferWriter <byte> writer = new ArrayBufferWriter <byte>(); // First, input with no flushing and no leftover data. ReadOnlySequence <char> inputData = SequenceFactory.Create( new char[] { '\u0020' }, // U+0020 new char[] { '\ud7ff' }); // U+D7FF EncodingExtensions.Convert(encoder, inputData, writer, flush: false, out long bytesUsed, out bool completed); Assert.Equal(4, bytesUsed); Assert.True(completed); // Then, input with no flushing and leftover data. inputData = SequenceFactory.Create( new char[] { '\udbc0' }); // U+100000 (continues on next line) EncodingExtensions.Convert(encoder, inputData, writer, flush: false, out bytesUsed, out completed); Assert.Equal(0, bytesUsed); Assert.False(completed); // Then, input with flushing and leftover data (should be replaced). inputData = SequenceFactory.Create( new char[] { '\udc00' }, // (cont.) new char[] { '\ud800' }); // leftover data (should be replaced) EncodingExtensions.Convert(encoder, inputData, writer, flush: true, out bytesUsed, out completed); Assert.Equal(7, bytesUsed); Assert.True(completed); // Now make sure all of the data was decoded properly. Assert.Equal(Encoding.UTF8.GetBytes("\u0020\ud7ff\U00100000\ufffd"), writer.WrittenSpan.ToArray()); }
/// <summary> /// Performs a test that `ReadDelimitedDataFrom` can read the same data back as was written in various different encodings. /// </summary> public void TestReadDelimitedDataFrom() { //Unicode string string testString = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890!@#$%^&*()_+-="; int testStringLength = testString.Length; //With every encoding in the system foreach (var encodingInfo in System.Text.Encoding.GetEncodings()) { //Create a new memory stream using (var ms = new System.IO.MemoryStream()) { //Get the encoding var encoding = encodingInfo.GetEncoding(); System.Console.WriteLine("Testing: " + encoding.EncodingName); //Create a writer on that same stream using a small buffer using (var streamWriter = new System.IO.StreamWriter(ms, encoding, 1, true)) { //Get the binary representation of the string in the encoding being tested var encodedData = encoding.GetBytes(testString); //Cache the length of the data int encodedDataLength = encodedData.Length; //Write the value in the encoding streamWriter.Write(testString); //Ensure in the stream streamWriter.Flush(); //Go back to the beginning ms.Position = 0; string actual; int read; //Ensure that was read correctly using the binary length and not the string length //(should try to over read) if (false != EncodingExtensions.ReadDelimitedDataFrom(encoding, ms, null, encodedDataLength, out actual, out read)) { throw new System.Exception("ReadDelimitedDataFrom failed."); } //Ensure the position if (ms.Position > encodedDataLength + encoding.GetPreamble().Length) { throw new System.Exception("Stream.Position is not correct."); } //Ensure the strings are equal (The extra byte is spacing) int difference = string.Compare(encoding.GetString(encoding.GetBytes(testString)), actual); if (difference != 0 && difference > 1) { throw new System.Exception("string data is incorrect."); } Console.WriteLine(actual); } } } }
public void LoadFile(string path) { const int limit = 10000; var binded = false; using (var sr = new StreamReader(new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), EncodingExtensions.GetEncoding(path))) { //edit by gh //var conf = new CsvHelper.Configuration.Configuration() {MissingFieldFound = null, BadDataFound = null}; CsvConfiguration conf = new CsvConfiguration(CultureInfo.CurrentCulture); using (var csv = new CsvReader(sr, conf)) { var i = 0; while (csv.Read()) { List <string> result = new List <string>(); int k = 0; for (k = 0; csv.TryGetField <string>(k, out string value); k++) { result.Add(value); } if (!binded) { SetupColumnBinding(result.Count + 1); binded = true; } else { //补位 for (; k < dataGrid.Columns.Count; k++) { result.Add(""); } } var row = Concat(new[] { $"{i++ + 1}".PadLeft(6) }, result.ToArray()); if (i > limit) { Rows.Add(Enumerable.Repeat("...", row.Length).ToArray()); break; } Rows.Add(row); } } /* * using (var parser = new CsvParser(sr, conf)) * { * var i = 0; * while (true) * { * var row = parser.Read(); * if (row == null) * break; * row = Concat(new[] { $"{i++ + 1}".PadLeft(6) }, row); * * if (!binded) * { * SetupColumnBinding(row.Length); * binded = true; * } * * if (i > limit) * { * Rows.Add(Enumerable.Repeat("...", row.Length).ToArray()); * break; * } * * Rows.Add(row); * } * } */ //-------------------// } }
public static void GetBytes_Encoding_ReadOnlySequence_ParamChecks() { ReadOnlySequence <char> sequence = new ReadOnlySequence <char>(new char[0]); Assert.Throws <ArgumentNullException>("encoding", () => EncodingExtensions.GetBytes(null, sequence)); }
public static void GetChars_Encoding_ReadOnlySequence_Span_ParamChecks() { ReadOnlySequence <byte> sequence = new ReadOnlySequence <byte>(new byte[0]); Assert.Throws <ArgumentNullException>("encoding", () => EncodingExtensions.GetChars((Encoding)null, sequence, Span <char> .Empty)); }
public static void GetString_Encoding_ReadOnlySequence_ParamChecks() { ReadOnlySequence <byte> sequence = new ReadOnlySequence <byte>(new byte[0]); Assert.Throws <ArgumentNullException>("encoding", () => EncodingExtensions.GetString(null, sequence)); }
[TestCase(@"\u12af", @"ኯ")] //valid hex value public void DecodeEncodedNonAsciiCharacters(string value, string expected) { var actual = EncodingExtensions.DecodeEncodedNonAsciiCharacters(value); Assert.AreEqual(expected, actual); }
private void LoadFileAsync(string path) { Task.Run(() => { const int maxLength = 5 * 1024 * 1024; const int maxHighlightingLength = (int)(0.5 * 1024 * 1024); var buffer = new MemoryStream(); bool fileTooLong; using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { fileTooLong = fs.Length > maxLength; while (fs.Position < fs.Length && buffer.Length < maxLength) { if (_disposed) { break; } var lb = new byte[8192]; int len = fs.Read(lb, 0, lb.Length); buffer.Write(lb, 0, len); } } if (_disposed) { return; } if (fileTooLong) { _context.Title += " (0 ~ 5MB)"; } var bufferCopy = buffer.ToArray(); buffer.Dispose(); //edit by gh - //使用NChardet解决大部分编码识别问题 var encoding = EncodingExtensions.GetEncoding(path, bufferCopy.Length); //var encoding = EncodingExtensions.GetEncoding_utf(bufferCopy); //----------- var doc = new TextDocument(encoding.GetString(bufferCopy)); doc.SetOwnerThread(Dispatcher.Thread); if (_disposed) { return; } Dispatcher.BeginInvoke(new Action(() => { Encoding = encoding; SyntaxHighlighting = bufferCopy.Length > maxHighlightingLength ? null : HighlightingManager.Instance.GetDefinitionByExtension(Path.GetExtension(path)); Document = doc; _context.IsBusy = false; }), DispatcherPriority.Render); }); }