public override ITextStorage FromStream(Stream stream, Encoding encoding, out LineEndingInfo lineEndingInfo)
 {
     bool utf8 = encoding is UTF8Encoding;
     if (!utf8 && !(encoding is ANSIEncoding))
     {
         Debug.Assert(false);
         throw new ArgumentException();
     }
     return new Utf8GapStorage(
         this,
         new Utf8SplayGapBuffer(
             stream,
             utf8/*detectBom*/,
             encoding,
             out lineEndingInfo));
 }
 public Utf8SplayGapBuffer(Utf8SplayGapBuffer source, int startLine, int countLines, Encoding encoding, out LineEndingInfo lineEndingInfo)
     : this(new VectorReadStream(
             source.vector,
             source.GetStartIndexOfLineRelative(startLine, source.prefixLength),
             source.GetStartIndexOfLineRelative(startLine + countLines, source.prefixLength)
                 - source.GetStartIndexOfLineRelative(startLine, source.prefixLength)),
         false/*detectBom*/,
         encoding,
         out lineEndingInfo)
 {
 }
        public Utf8SplayGapBuffer(
            Stream stream,
            bool detectBom,
            Encoding encoding,
            out LineEndingInfo lineEndingInfo)
        {
            byte[] buffer = new byte[vector.MaxBlockSize];
            while (true)
            {
                int read = stream.Read(buffer, 0, buffer.Length);
                if (read == 0)
                {
                    break;
                }
                vector.InsertRange(vector.Count, buffer, 0, read);
            }

            if ((vector.Count >= 3) && ((vector[0] == 0xEF) && (vector[1] == 0xBB) && (vector[2] == 0xBF)))
            {
                bomLength = 3;
            }

            lineEndingInfo = new LineEndingInfo();

            // invariant: require separators at ends
            Debug.Assert(WindowsLF.Length == 2);
            prefixLength = (byte)(bomLength + 2);
            vector.InsertRange(bomLength, WindowsLF);
            suffixLength = 2;
            vector.InsertRange(vector.Count, WindowsLF);

            totalLines = 0;
            currentLine = 0;
            currentOffset = prefixLength;

            lineSkipMap.Reset(prefixLength, suffixLength);

            bool ignoreEncoding = (encoding == null) || (encoding is UTF8Encoding);

            int lineEndingCount = 0;
            int endOfData = vector.Count - suffixLength/*avoid our artifical addition*/;
            //
            int currentSkipStartLine = 0;
            int currentSkipNumLines = 0;
            int currentSkipCharOffset = prefixLength;
            int currentSkipCharLength = 0;
            //
            while (currentOffset < endOfData)
            {
                int textEnd = vector.IndexOfAny(LineEndingChars, currentOffset, endOfData - currentOffset);
                if (textEnd < 0)
                {
                    textEnd = endOfData;
                }
                int textLength = textEnd - currentOffset;

                // TODO: can extend to also handle line terminators
                if (!ignoreEncoding)
                {
                    byte[] bytes = new byte[textLength];
                    string s = encoding.GetString(bytes);
                    int c = Encoding.UTF8.GetByteCount(s);
                    if (c != bytes.Length)
                    {
                        byte[] bytes2 = Encoding.UTF8.GetBytes(s);
                        vector.ReplaceRange(currentOffset, bytes.Length, bytes2);
                        currentSkipCharLength += bytes2.Length - bytes.Length;
                    }
                }

                Debug.Assert(IsAtLineEnding(textEnd));
                int nextStart = textEnd;
                if (nextStart < endOfData)
                {
                    lineEndingCount++;
                    if (vector[nextStart] == (byte)'\r')
                    {
                        if (vector[nextStart + 1] == (byte)'\n')
                        {
                            nextStart++;
                            lineEndingInfo.windowsLFCount++;
                        }
                        else
                        {
                            lineEndingInfo.macintoshLFCount++;
                        }
                    }
                    else
                    {
                        Debug.Assert(vector[nextStart] == (byte)'\n');
                        lineEndingInfo.unixLFCount++;
                    }
                    nextStart++;
                }

                currentSkipNumLines++;
                currentSkipCharLength += nextStart - currentOffset;
                if (currentSkipNumLines > LineSkipMap.Sparseness)
                {
                    lineSkipMap.BulkLinesInserted(currentSkipStartLine, currentSkipNumLines, currentSkipCharOffset, currentSkipCharLength);
                    currentSkipStartLine += currentSkipNumLines;
                    currentSkipNumLines = 0;
                    currentSkipCharOffset += currentSkipCharLength;
                    currentSkipCharLength = 0;
                }

                currentLine++;
                totalLines++;
                currentOffset = nextStart;
            }
            if (currentSkipNumLines != 0)
            {
                lineSkipMap.BulkLinesInserted(currentSkipStartLine, currentSkipNumLines, currentSkipCharOffset, currentSkipCharLength);
            }

            if (lineEndingCount == totalLines)
            {
                // file ends with blank line
                totalLines++;
            }
            else
            {
                // last line was unterminated - back it out
                lineSkipMap.LineRemoved(currentLine, -suffixLength);
                lineSkipMap.LineLengthChanged(currentLine, suffixLength);

                currentOffset += suffixLength;
            }

            if (EnableValidate)
            {
                Validate();
            }
        }
Example #4
0
            public virtual ITextStorage FromStream(
                Stream stream,
                Encoding encoding,
                out LineEndingInfo lineEndingInfo)
            {
                lineEndingInfo = new LineEndingInfo();

                TextStorage text = NewStorage();

                Decoder decoder = encoding.GetDecoder();

                byte[] bytes = new byte[4096]; // odd number for testing continuations
                int usedBytes = 0;
                char[] chars = new char[4096];
                int usedChars = 0;
                char[] currentLine = new char[128];
                int currentLineIndex = 0;
                int index = 0;
                while (true)
                {
                    int readBytes = stream.Read(bytes, usedBytes, bytes.Length - usedBytes);
                    usedBytes += readBytes;

                    // if there is an incomplete multi-byte sequence at the end of the file, Convert()
                    // will eat it without reporting.
                    int usedBytes1, usedChars1;
                    bool completed;
                    decoder.Convert(bytes, 0, usedBytes, chars, usedChars, chars.Length - usedChars, readBytes == 0/*flush*/, out usedBytes1, out usedChars1, out completed);
                    Array.Copy(bytes, usedBytes1, bytes, 0, usedBytes - usedBytes1);
                    usedBytes -= usedBytes1;
                    usedChars += usedChars1;

                    int i;
                    for (i = 0; i < usedChars; i++)
                    {
                        bool lineBreak = false;
                        if (chars[i] == '\r')
                        {
                            if (!(i + 1 < usedChars))
                            {
                                if (i == 0)
                                {
                                    lineBreak = true;
                                    lineEndingInfo.macintoshLFCount++;
                                }
                                else
                                {
                                    break;
                                }
                            }
                            else
                            {
                                lineBreak = true;
                                if (chars[i + 1] == '\n')
                                {
                                    i++;
                                    lineEndingInfo.windowsLFCount++;
                                }
                                else
                                {
                                    lineEndingInfo.macintoshLFCount++;
                                }
                            }
                        }
                        else if (chars[i] == '\n')
                        {
                            lineBreak = true;
                            lineEndingInfo.unixLFCount++;
                        }

                        if (lineBreak)
                        {
                            text.Insert(index, Encode(currentLine, 0, currentLineIndex));
                            currentLineIndex = 0;
                            index++;
                        }
                        else
                        {
                            if (currentLine.Length == currentLineIndex)
                            {
                                Array.Resize(ref currentLine, currentLine.Length * 2);
                            }
                            currentLine[currentLineIndex++] = chars[i];
                        }
                    }
                    Array.Copy(chars, i, chars, 0, usedChars - i);
                    usedChars -= i;

                    if ((usedBytes == 0) && (readBytes == 0) && (usedChars == 0))
                    {
                        break;
                    }
                }
                Debug.Assert(index == text.GetLineCount() - 1);
                text.SetLine(index, Encode(currentLine, 0, currentLineIndex));

                text.modified = false;
                return text;
            }