public ITextDocument CreateAndLoadTextDocument(string filePath, IContentType contentType, Encoding encoding, out bool characterSubstitutionsOccurred) { if (filePath == null) { throw new ArgumentNullException("filePath"); } if (contentType == null) { throw new ArgumentNullException("contentType"); } if (encoding == null) { throw new ArgumentNullException("encoding"); } var fallbackDetector = new FallbackDetector(encoding.DecoderFallback); var modifiedEncoding = (Encoding)encoding.Clone(); modifiedEncoding.DecoderFallback = fallbackDetector; ITextBuffer buffer; DateTime lastModified; long fileSize; using (Stream stream = OpenFile(filePath, out lastModified, out fileSize)) { // Caller knows best, so don't use byte order marks. using (StreamReader reader = new StreamReader(stream, modifiedEncoding, detectEncodingFromByteOrderMarks: false)) { System.Diagnostics.Debug.Assert(encoding.CodePage == reader.CurrentEncoding.CodePage); buffer = ((ITextBufferFactoryService2)BufferFactoryService).CreateTextBuffer(reader, contentType, fileSize, filePath); } } characterSubstitutionsOccurred = fallbackDetector.FallbackOccurred; #if _DEBUG TextUtilities.TagBuffer(buffer, filePath); #endif TextDocument textDocument = new TextDocument(buffer, filePath, lastModified, this, encoding, explicitEncoding: true); RaiseTextDocumentCreated(textDocument); return(textDocument); }
public ReloadResult Reload(EditOptions options) { if (_isDisposed) { throw new ObjectDisposedException(nameof(ITextDocument)); } if (_raisingDirtyStateChangedEvent || _raisingFileActionChangedEvent) { throw new InvalidOperationException(); } Encoding newEncoding; var beforeSnapshot = _textBuffer.CurrentSnapshot; bool characterSubstitutionsOccurred = false; try { _reloadingFile = true; // Load the file and read the contents to the text buffer long fileSize; using (var stream = TextDocumentFactoryService.OpenFile(_filePath, out _lastModifiedTimeUtc, out fileSize)) { var detectors = ExtensionSelector.SelectMatchingExtensions(_textDocumentFactoryService.OrderedEncodingDetectors, _textBuffer.ContentType); if (_explicitEncoding) { // If the user explicitly chose their encoding, we want to respect it. newEncoding = this.Encoding; } else { newEncoding = EncodedStreamReader.DetectEncoding(stream, detectors, _textDocumentFactoryService.GuardedOperations); } if (newEncoding == null && _attemptUtf8Detection) { try { var detectorEncoding = new ExtendedCharacterDetector(); ReloadBufferFromStream(stream, fileSize, options, detectorEncoding); if (detectorEncoding.DecodedExtendedCharacters) { // Valid UTF-8 but has bytes that are not merely ASCII. newEncoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); } else { // Valid UTF8 but no extended characters, so it looks like valid ASCII. // However, we don't use ASCII here because of the following scenario: // The user with a non-English US system encoding opens a code file that happens to contain ASCII-only contents // Therefore we'll just use their system encoding. newEncoding = Encoding.Default; } } catch (DecoderFallbackException) { // Not valid UTF-8. // Proceed to the next if block to try the system's default codepage. // For example, this occurs when you have extended characters like € in a UTF-8 file or ANSI file. // We reset the stream so we can continue loading with the default system encoding. Debug.Assert(newEncoding == null); Debug.Assert(beforeSnapshot.Version.Next == null); stream.Position = 0; } } // If all else didn't work, use system's default encoding. if (newEncoding == null) { newEncoding = Encoding.Default; } //If there is no "Next" version of the original snapshot, we have not successfully reloaded the document if (beforeSnapshot.Version.Next == null) { //We use this fall back detector to observe whether or not character substitutions //occur while we're reading the stream var fallbackDetector = new FallbackDetector(newEncoding.DecoderFallback); var modifiedEncoding = (Encoding)newEncoding.Clone(); modifiedEncoding.DecoderFallback = fallbackDetector; Debug.Assert(stream.Position == 0); ReloadBufferFromStream(stream, fileSize, options, modifiedEncoding); if (fallbackDetector.FallbackOccurred) { characterSubstitutionsOccurred = fallbackDetector.FallbackOccurred; } } } } finally { _reloadingFile = false; } //The snapshot on a reload will change even if the contents of the before & after files are identical (differences will simply find an //empty set of changes) so this test is a measure of whether of not the reload succeeded. if (beforeSnapshot.Version.Next != null) { // Update status // set the "clean" reiterated version number to the reiterated version number of the version immediately // after the before snapshot (which is the state of the buffer after loading the document but before any // subsequent edits made in the text buffer changed events). _cleanReiteratedVersion = beforeSnapshot.Version.Next.ReiteratedVersionNumber; // TODO: the following event really should be queued up through the buffer group so that it comes before // the text changed event (and any subsequent text changed event invoked from an event handler) RaiseFileActionChangedEvent(_lastModifiedTimeUtc, FileActionTypes.ContentLoadedFromDisk, _filePath); this.Encoding = newEncoding; return(characterSubstitutionsOccurred ? ReloadResult.SucceededWithCharacterSubstitutions : ReloadResult.Succeeded); } else { return(ReloadResult.Aborted); } }
public ITextDocument CreateAndLoadTextDocument(string filePath, IContentType contentType, bool attemptUtf8Detection, out bool characterSubstitutionsOccurred) { if (filePath == null) { throw new ArgumentNullException(nameof(filePath)); } if (contentType == null) { throw new ArgumentNullException(nameof(contentType)); } characterSubstitutionsOccurred = false; Encoding chosenEncoding = null; ITextBuffer buffer = null; DateTime lastModified; long fileSize; // select matching detectors without instantiating any var detectors = ExtensionSelector.SelectMatchingExtensions(OrderedEncodingDetectors, contentType); using (Stream stream = OpenFile(filePath, out lastModified, out fileSize)) { // First, look for a byte order marker and let the encoding detecters // suggest encodings. chosenEncoding = EncodedStreamReader.DetectEncoding(stream, detectors, GuardedOperations); // If that didn't produce a result, tentatively try to open as UTF 8. if (chosenEncoding == null && attemptUtf8Detection) { try { var detectorEncoding = new ExtendedCharacterDetector(); using (StreamReader reader = new EncodedStreamReader.NonStreamClosingStreamReader(stream, detectorEncoding, false)) { buffer = ((ITextBufferFactoryService2)BufferFactoryService).CreateTextBuffer(reader, contentType, fileSize, filePath); characterSubstitutionsOccurred = false; } if (detectorEncoding.DecodedExtendedCharacters) { // Valid UTF-8 but has bytes that are not merely ASCII. chosenEncoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); } else { // Valid UTF8 but no extended characters, so it's valid ASCII. // We don't use ASCII here because of the following scenario: // The user with a non-ENU system encoding opens a code file with ASCII-only contents chosenEncoding = DefaultEncoding; } } catch (DecoderFallbackException) { // Not valid UTF-8. // Proceed to the next if block to try the system's default codepage. Debug.Assert(buffer == null); buffer = null; stream.Position = 0; } } Debug.Assert(buffer == null || chosenEncoding != null); // If all else didn't work, use system's default encoding. if (chosenEncoding == null) { chosenEncoding = DefaultEncoding; } if (buffer == null) { var fallbackDetector = new FallbackDetector(chosenEncoding.DecoderFallback); var modifiedEncoding = (Encoding)chosenEncoding.Clone(); modifiedEncoding.DecoderFallback = fallbackDetector; Debug.Assert(stream.Position == 0); using (StreamReader reader = new EncodedStreamReader.NonStreamClosingStreamReader(stream, modifiedEncoding, detectEncodingFromByteOrderMarks: false)) { Debug.Assert(chosenEncoding.CodePage == reader.CurrentEncoding.CodePage); buffer = ((ITextBufferFactoryService2)BufferFactoryService).CreateTextBuffer(reader, contentType, fileSize, filePath); } characterSubstitutionsOccurred = fallbackDetector.FallbackOccurred; } } TextDocument textDocument = new TextDocument(buffer, filePath, lastModified, this, chosenEncoding, attemptUtf8Detection: attemptUtf8Detection); RaiseTextDocumentCreated(textDocument); return(textDocument); }
public ReloadResult Reload(EditOptions options) { if (_isDisposed) { throw new ObjectDisposedException("ITextDocument"); } if (_raisingDirtyStateChangedEvent || _raisingFileActionChangedEvent) { throw new InvalidOperationException(); } var beforeSnapshot = _textBuffer.CurrentSnapshot; Encoding newEncoding = null; FallbackDetector fallbackDetector; try { _reloadingFile = true; // Load the file and read the contents to the text buffer long fileSize; using (var stream = TextDocumentFactoryService.OpenFileGuts(_filePath, out _lastModifiedTimeUtc, out fileSize)) { // We want to use the encoding indicated by a BoM if one is present because // VS9's editor did so. We can't let the StreamReader below detect // the byte order marks because we still want to be able to detect the // fallback condition. bool unused; newEncoding = EncodedStreamReader.CheckForBoM(stream, isStreamEmpty: out unused); Debug.Assert(newEncoding == null || newEncoding.GetPreamble().Length > 0); // TODO: Consider using the encoder detector extensions as well. if (newEncoding == null) { newEncoding = this.Encoding; } fallbackDetector = new FallbackDetector(newEncoding.DecoderFallback); var modifiedEncoding = (Encoding)newEncoding.Clone(); modifiedEncoding.DecoderFallback = fallbackDetector; using (var streamReader = new StreamReader(stream, modifiedEncoding, detectEncodingFromByteOrderMarks: false)) { TextBuffer concreteBuffer = _textBuffer as TextBuffer; if (concreteBuffer != null) { ITextStorageLoader loader; if (fileSize < TextModelOptions.CompressedStorageFileSizeThreshold) { loader = new SimpleTextStorageLoader(streamReader, (int)fileSize); } else { loader = new CompressedTextStorageLoader(streamReader, (int)fileSize, _filePath); } StringRebuilder newContent = SimpleStringRebuilder.Create(loader); if (!loader.HasConsistentLineEndings) { // leave a sign that line endings are inconsistent. This is rather nasty but for now // we don't want to pollute the API with this factoid. concreteBuffer.Properties["InconsistentLineEndings"] = true; } else { // this covers a really obscure case where on initial load the file had inconsistent line // endings, but the UI settings were such that it was ignored, and since then the file has // acquired consistent line endings and the UI settings have also changed. concreteBuffer.Properties.RemoveProperty("InconsistentLineEndings"); } // leave a similar sign about the longest line in the buffer. concreteBuffer.Properties["LongestLineLength"] = loader.LongestLineLength; concreteBuffer.ReloadContent(newContent, options, editTag: this); } else { // we may hit this path if somebody mocks the text buffer in a test. using (var edit = _textBuffer.CreateEdit(options, null, editTag: this)) { if (edit.Replace(new Span(0, edit.Snapshot.Length), streamReader.ReadToEnd())) { edit.Apply(); } else { edit.Cancel(); } } } Debug.Assert(streamReader.CurrentEncoding.CodePage == newEncoding.CodePage); Debug.Assert(streamReader.CurrentEncoding.GetPreamble().Length == newEncoding.GetPreamble().Length); } } } finally { _reloadingFile = false; } //The snapshot on a reload will change even if the contents of the before & after files are identical (differences will simply find an //empty set of changes) so this test is a measure of whether of not the reload succeeded. if (beforeSnapshot.Version.Next != null) { // Update status // set the "clean" reiterated version number to the reiterated version number of the version immediately // after the before snapshot (which is the state of the buffer after loading the document but before any // subsequent edits made in the text buffer changed events). _cleanReiteratedVersion = beforeSnapshot.Version.Next.ReiteratedVersionNumber; // TODO: the following event really should be queued up through the buffer group so that it comes before // the text changed event (and any subsequent text changed event invoked from an event handler) RaiseFileActionChangedEvent(_lastModifiedTimeUtc, FileActionTypes.ContentLoadedFromDisk, _filePath); this.Encoding = newEncoding; return(fallbackDetector.FallbackOccurred ? ReloadResult.SucceededWithCharacterSubstitutions : ReloadResult.Succeeded); } else { return(ReloadResult.Aborted); } }