public ITextDocument CreateAndLoadTextDocument(string filePath, IContentType contentType, Encoding encoding, out bool characterSubstitutionsOccurred)
        {
            if (filePath == null)
            {
                throw new ArgumentNullException("filePath");
            }

            if (contentType == null)
            {
                throw new ArgumentNullException("contentType");
            }

            if (encoding == null)
            {
                throw new ArgumentNullException("encoding");
            }

            var fallbackDetector = new FallbackDetector(encoding.DecoderFallback);
            var modifiedEncoding = (Encoding)encoding.Clone();

            modifiedEncoding.DecoderFallback = fallbackDetector;

            ITextBuffer buffer;
            DateTime    lastModified;
            long        fileSize;

            using (Stream stream = OpenFile(filePath, out lastModified, out fileSize))
            {
                // Caller knows best, so don't use byte order marks.
                using (StreamReader reader = new StreamReader(stream, modifiedEncoding, detectEncodingFromByteOrderMarks: false))
                {
                    System.Diagnostics.Debug.Assert(encoding.CodePage == reader.CurrentEncoding.CodePage);
                    buffer = ((ITextBufferFactoryService2)BufferFactoryService).CreateTextBuffer(reader, contentType, fileSize, filePath);
                }
            }

            characterSubstitutionsOccurred = fallbackDetector.FallbackOccurred;

#if _DEBUG
            TextUtilities.TagBuffer(buffer, filePath);
#endif
            TextDocument textDocument = new TextDocument(buffer, filePath, lastModified, this, encoding, explicitEncoding: true);

            RaiseTextDocumentCreated(textDocument);

            return(textDocument);
        }
Exemple #2
0
        public ReloadResult Reload(EditOptions options)
        {
            if (_isDisposed)
            {
                throw new ObjectDisposedException(nameof(ITextDocument));
            }
            if (_raisingDirtyStateChangedEvent || _raisingFileActionChangedEvent)
            {
                throw new InvalidOperationException();
            }

            Encoding newEncoding;
            var      beforeSnapshot = _textBuffer.CurrentSnapshot;
            bool     characterSubstitutionsOccurred = false;

            try {
                _reloadingFile = true;

                // Load the file and read the contents to the text buffer
                long fileSize;

                using (var stream = TextDocumentFactoryService.OpenFile(_filePath, out _lastModifiedTimeUtc, out fileSize)) {
                    var detectors = ExtensionSelector.SelectMatchingExtensions(_textDocumentFactoryService.OrderedEncodingDetectors, _textBuffer.ContentType);

                    if (_explicitEncoding)
                    {
                        // If the user explicitly chose their encoding, we want to respect it.
                        newEncoding = this.Encoding;
                    }
                    else
                    {
                        newEncoding = EncodedStreamReader.DetectEncoding(stream, detectors, _textDocumentFactoryService.GuardedOperations);
                    }

                    if (newEncoding == null && _attemptUtf8Detection)
                    {
                        try {
                            var detectorEncoding = new ExtendedCharacterDetector();

                            ReloadBufferFromStream(stream, fileSize, options, detectorEncoding);

                            if (detectorEncoding.DecodedExtendedCharacters)
                            {
                                // Valid UTF-8 but has bytes that are not merely ASCII.
                                newEncoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
                            }
                            else
                            {
                                // Valid UTF8 but no extended characters, so it looks like valid ASCII.
                                // However, we don't use ASCII here because of the following scenario:
                                // The user with a non-English US system encoding opens a code file that happens to contain ASCII-only contents
                                // Therefore we'll just use their system encoding.
                                newEncoding = Encoding.Default;
                            }
                        } catch (DecoderFallbackException) {
                            // Not valid UTF-8.
                            // Proceed to the next if block to try the system's default codepage.
                            // For example, this occurs when you have extended characters like € in a UTF-8 file or ANSI file.
                            // We reset the stream so we can continue loading with the default system encoding.
                            Debug.Assert(newEncoding == null);
                            Debug.Assert(beforeSnapshot.Version.Next == null);
                            stream.Position = 0;
                        }
                    }

                    // If all else didn't work, use system's default encoding.
                    if (newEncoding == null)
                    {
                        newEncoding = Encoding.Default;
                    }

                    //If there is no "Next" version of the original snapshot, we have not successfully reloaded the document
                    if (beforeSnapshot.Version.Next == null)
                    {
                        //We use this fall back detector to observe whether or not character substitutions
                        //occur while we're reading the stream
                        var fallbackDetector = new FallbackDetector(newEncoding.DecoderFallback);
                        var modifiedEncoding = (Encoding)newEncoding.Clone();
                        modifiedEncoding.DecoderFallback = fallbackDetector;

                        Debug.Assert(stream.Position == 0);
                        ReloadBufferFromStream(stream, fileSize, options, modifiedEncoding);

                        if (fallbackDetector.FallbackOccurred)
                        {
                            characterSubstitutionsOccurred = fallbackDetector.FallbackOccurred;
                        }
                    }
                }
            } finally {
                _reloadingFile = false;
            }

            //The snapshot on a reload will change even if the contents of the before & after files are identical (differences will simply find an
            //empty set of changes) so this test is a measure of whether of not the reload succeeded.
            if (beforeSnapshot.Version.Next != null)
            {
                // Update status
                // set the "clean" reiterated version number to the reiterated version number of the version immediately
                // after the before snapshot (which is the state of the buffer after loading the document but before any
                // subsequent edits made in the text buffer changed events).
                _cleanReiteratedVersion = beforeSnapshot.Version.Next.ReiteratedVersionNumber;

                // TODO: the following event really should be queued up through the buffer group so that it comes before
                // the text changed event (and any subsequent text changed event invoked from an event handler)
                RaiseFileActionChangedEvent(_lastModifiedTimeUtc, FileActionTypes.ContentLoadedFromDisk, _filePath);
                this.Encoding = newEncoding;
                return(characterSubstitutionsOccurred ? ReloadResult.SucceededWithCharacterSubstitutions : ReloadResult.Succeeded);
            }
            else
            {
                return(ReloadResult.Aborted);
            }
        }
        public ITextDocument CreateAndLoadTextDocument(string filePath, IContentType contentType, bool attemptUtf8Detection, out bool characterSubstitutionsOccurred)
        {
            if (filePath == null)
            {
                throw new ArgumentNullException(nameof(filePath));
            }

            if (contentType == null)
            {
                throw new ArgumentNullException(nameof(contentType));
            }

            characterSubstitutionsOccurred = false;

            Encoding    chosenEncoding = null;
            ITextBuffer buffer         = null;
            DateTime    lastModified;
            long        fileSize;

            // select matching detectors without instantiating any
            var detectors = ExtensionSelector.SelectMatchingExtensions(OrderedEncodingDetectors, contentType);

            using (Stream stream = OpenFile(filePath, out lastModified, out fileSize))
            {
                // First, look for a byte order marker and let the encoding detecters
                // suggest encodings.
                chosenEncoding = EncodedStreamReader.DetectEncoding(stream, detectors, GuardedOperations);

                // If that didn't produce a result, tentatively try to open as UTF 8.
                if (chosenEncoding == null && attemptUtf8Detection)
                {
                    try
                    {
                        var detectorEncoding = new ExtendedCharacterDetector();

                        using (StreamReader reader = new EncodedStreamReader.NonStreamClosingStreamReader(stream, detectorEncoding, false))
                        {
                            buffer = ((ITextBufferFactoryService2)BufferFactoryService).CreateTextBuffer(reader, contentType, fileSize, filePath);
                            characterSubstitutionsOccurred = false;
                        }

                        if (detectorEncoding.DecodedExtendedCharacters)
                        {
                            // Valid UTF-8 but has bytes that are not merely ASCII.
                            chosenEncoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
                        }
                        else
                        {
                            // Valid UTF8 but no extended characters, so it's valid ASCII.
                            // We don't use ASCII here because of the following scenario:
                            // The user with a non-ENU system encoding opens a code file with ASCII-only contents
                            chosenEncoding = DefaultEncoding;
                        }
                    }
                    catch (DecoderFallbackException)
                    {
                        // Not valid UTF-8.
                        // Proceed to the next if block to try the system's default codepage.
                        Debug.Assert(buffer == null);
                        buffer          = null;
                        stream.Position = 0;
                    }
                }

                Debug.Assert(buffer == null || chosenEncoding != null);

                // If all else didn't work, use system's default encoding.
                if (chosenEncoding == null)
                {
                    chosenEncoding = DefaultEncoding;
                }

                if (buffer == null)
                {
                    var fallbackDetector = new FallbackDetector(chosenEncoding.DecoderFallback);
                    var modifiedEncoding = (Encoding)chosenEncoding.Clone();
                    modifiedEncoding.DecoderFallback = fallbackDetector;

                    Debug.Assert(stream.Position == 0);

                    using (StreamReader reader = new EncodedStreamReader.NonStreamClosingStreamReader(stream, modifiedEncoding, detectEncodingFromByteOrderMarks: false))
                    {
                        Debug.Assert(chosenEncoding.CodePage == reader.CurrentEncoding.CodePage);
                        buffer = ((ITextBufferFactoryService2)BufferFactoryService).CreateTextBuffer(reader, contentType, fileSize, filePath);
                    }

                    characterSubstitutionsOccurred = fallbackDetector.FallbackOccurred;
                }
            }

            TextDocument textDocument = new TextDocument(buffer, filePath, lastModified, this, chosenEncoding, attemptUtf8Detection: attemptUtf8Detection);

            RaiseTextDocumentCreated(textDocument);

            return(textDocument);
        }
Exemple #4
0
        public ReloadResult Reload(EditOptions options)
        {
            if (_isDisposed)
            {
                throw new ObjectDisposedException("ITextDocument");
            }
            if (_raisingDirtyStateChangedEvent || _raisingFileActionChangedEvent)
            {
                throw new InvalidOperationException();
            }

            var              beforeSnapshot = _textBuffer.CurrentSnapshot;
            Encoding         newEncoding    = null;
            FallbackDetector fallbackDetector;

            try
            {
                _reloadingFile = true;

                // Load the file and read the contents to the text buffer

                long fileSize;
                using (var stream = TextDocumentFactoryService.OpenFileGuts(_filePath, out _lastModifiedTimeUtc, out fileSize))
                {
                    // We want to use the encoding indicated by a BoM if one is present because
                    // VS9's editor did so. We can't let the StreamReader below detect
                    // the byte order marks because we still want to be able to detect the
                    // fallback condition.
                    bool unused;
                    newEncoding = EncodedStreamReader.CheckForBoM(stream, isStreamEmpty: out unused);

                    Debug.Assert(newEncoding == null || newEncoding.GetPreamble().Length > 0);

                    // TODO: Consider using the encoder detector extensions as well.

                    if (newEncoding == null)
                    {
                        newEncoding = this.Encoding;
                    }

                    fallbackDetector = new FallbackDetector(newEncoding.DecoderFallback);
                    var modifiedEncoding = (Encoding)newEncoding.Clone();
                    modifiedEncoding.DecoderFallback = fallbackDetector;

                    using (var streamReader = new StreamReader(stream, modifiedEncoding, detectEncodingFromByteOrderMarks: false))
                    {
                        TextBuffer concreteBuffer = _textBuffer as TextBuffer;
                        if (concreteBuffer != null)
                        {
                            ITextStorageLoader loader;
                            if (fileSize < TextModelOptions.CompressedStorageFileSizeThreshold)
                            {
                                loader = new SimpleTextStorageLoader(streamReader, (int)fileSize);
                            }
                            else
                            {
                                loader = new CompressedTextStorageLoader(streamReader, (int)fileSize, _filePath);
                            }
                            StringRebuilder newContent = SimpleStringRebuilder.Create(loader);
                            if (!loader.HasConsistentLineEndings)
                            {
                                // leave a sign that line endings are inconsistent. This is rather nasty but for now
                                // we don't want to pollute the API with this factoid.
                                concreteBuffer.Properties["InconsistentLineEndings"] = true;
                            }
                            else
                            {
                                // this covers a really obscure case where on initial load the file had inconsistent line
                                // endings, but the UI settings were such that it was ignored, and since then the file has
                                // acquired consistent line endings and the UI settings have also changed.
                                concreteBuffer.Properties.RemoveProperty("InconsistentLineEndings");
                            }
                            // leave a similar sign about the longest line in the buffer.
                            concreteBuffer.Properties["LongestLineLength"] = loader.LongestLineLength;

                            concreteBuffer.ReloadContent(newContent, options, editTag: this);
                        }
                        else
                        {
                            // we may hit this path if somebody mocks the text buffer in a test.
                            using (var edit = _textBuffer.CreateEdit(options, null, editTag: this))
                            {
                                if (edit.Replace(new Span(0, edit.Snapshot.Length), streamReader.ReadToEnd()))
                                {
                                    edit.Apply();
                                }
                                else
                                {
                                    edit.Cancel();
                                }
                            }
                        }
                        Debug.Assert(streamReader.CurrentEncoding.CodePage == newEncoding.CodePage);
                        Debug.Assert(streamReader.CurrentEncoding.GetPreamble().Length == newEncoding.GetPreamble().Length);
                    }
                }
            }
            finally
            {
                _reloadingFile = false;
            }

            //The snapshot on a reload will change even if the contents of the before & after files are identical (differences will simply find an
            //empty set of changes) so this test is a measure of whether of not the reload succeeded.
            if (beforeSnapshot.Version.Next != null)
            {
                // Update status
                // set the "clean" reiterated version number to the reiterated version number of the version immediately
                // after the before snapshot (which is the state of the buffer after loading the document but before any
                // subsequent edits made in the text buffer changed events).
                _cleanReiteratedVersion = beforeSnapshot.Version.Next.ReiteratedVersionNumber;

                // TODO: the following event really should be queued up through the buffer group so that it comes before
                // the text changed event (and any subsequent text changed event invoked from an event handler)
                RaiseFileActionChangedEvent(_lastModifiedTimeUtc, FileActionTypes.ContentLoadedFromDisk, _filePath);
                this.Encoding = newEncoding;
                return(fallbackDetector.FallbackOccurred ? ReloadResult.SucceededWithCharacterSubstitutions : ReloadResult.Succeeded);
            }
            else
            {
                return(ReloadResult.Aborted);
            }
        }