public override ITextStorage FromStream(Stream stream, Encoding encoding, out LineEndingInfo lineEndingInfo) { bool utf8 = encoding is UTF8Encoding; if (!utf8 && !(encoding is ANSIEncoding)) { Debug.Assert(false); throw new ArgumentException(); } return new Utf8GapStorage( this, new Utf8SplayGapBuffer( stream, utf8/*detectBom*/, encoding, out lineEndingInfo)); }
public Utf8SplayGapBuffer(Utf8SplayGapBuffer source, int startLine, int countLines, Encoding encoding, out LineEndingInfo lineEndingInfo) : this(new VectorReadStream( source.vector, source.GetStartIndexOfLineRelative(startLine, source.prefixLength), source.GetStartIndexOfLineRelative(startLine + countLines, source.prefixLength) - source.GetStartIndexOfLineRelative(startLine, source.prefixLength)), false/*detectBom*/, encoding, out lineEndingInfo) { }
public Utf8SplayGapBuffer( Stream stream, bool detectBom, Encoding encoding, out LineEndingInfo lineEndingInfo) { byte[] buffer = new byte[vector.MaxBlockSize]; while (true) { int read = stream.Read(buffer, 0, buffer.Length); if (read == 0) { break; } vector.InsertRange(vector.Count, buffer, 0, read); } if ((vector.Count >= 3) && ((vector[0] == 0xEF) && (vector[1] == 0xBB) && (vector[2] == 0xBF))) { bomLength = 3; } lineEndingInfo = new LineEndingInfo(); // invariant: require separators at ends Debug.Assert(WindowsLF.Length == 2); prefixLength = (byte)(bomLength + 2); vector.InsertRange(bomLength, WindowsLF); suffixLength = 2; vector.InsertRange(vector.Count, WindowsLF); totalLines = 0; currentLine = 0; currentOffset = prefixLength; lineSkipMap.Reset(prefixLength, suffixLength); bool ignoreEncoding = (encoding == null) || (encoding is UTF8Encoding); int lineEndingCount = 0; int endOfData = vector.Count - suffixLength/*avoid our artifical addition*/; // int currentSkipStartLine = 0; int currentSkipNumLines = 0; int currentSkipCharOffset = prefixLength; int currentSkipCharLength = 0; // while (currentOffset < endOfData) { int textEnd = vector.IndexOfAny(LineEndingChars, currentOffset, endOfData - currentOffset); if (textEnd < 0) { textEnd = endOfData; } int textLength = textEnd - currentOffset; // TODO: can extend to also handle line terminators if (!ignoreEncoding) { byte[] bytes = new byte[textLength]; string s = encoding.GetString(bytes); int c = Encoding.UTF8.GetByteCount(s); if (c != bytes.Length) { byte[] bytes2 = Encoding.UTF8.GetBytes(s); vector.ReplaceRange(currentOffset, bytes.Length, bytes2); currentSkipCharLength += bytes2.Length - bytes.Length; } } Debug.Assert(IsAtLineEnding(textEnd)); int nextStart = textEnd; if (nextStart < endOfData) { lineEndingCount++; if (vector[nextStart] == (byte)'\r') { if (vector[nextStart + 1] == (byte)'\n') { nextStart++; lineEndingInfo.windowsLFCount++; } else { lineEndingInfo.macintoshLFCount++; } } else { Debug.Assert(vector[nextStart] == (byte)'\n'); lineEndingInfo.unixLFCount++; } nextStart++; } currentSkipNumLines++; currentSkipCharLength += nextStart - currentOffset; if (currentSkipNumLines > LineSkipMap.Sparseness) { lineSkipMap.BulkLinesInserted(currentSkipStartLine, currentSkipNumLines, currentSkipCharOffset, currentSkipCharLength); currentSkipStartLine += currentSkipNumLines; currentSkipNumLines = 0; currentSkipCharOffset += currentSkipCharLength; currentSkipCharLength = 0; } currentLine++; totalLines++; currentOffset = nextStart; } if (currentSkipNumLines != 0) { lineSkipMap.BulkLinesInserted(currentSkipStartLine, currentSkipNumLines, currentSkipCharOffset, currentSkipCharLength); } if (lineEndingCount == totalLines) { // file ends with blank line totalLines++; } else { // last line was unterminated - back it out lineSkipMap.LineRemoved(currentLine, -suffixLength); lineSkipMap.LineLengthChanged(currentLine, suffixLength); currentOffset += suffixLength; } if (EnableValidate) { Validate(); } }
public virtual ITextStorage FromStream( Stream stream, Encoding encoding, out LineEndingInfo lineEndingInfo) { lineEndingInfo = new LineEndingInfo(); TextStorage text = NewStorage(); Decoder decoder = encoding.GetDecoder(); byte[] bytes = new byte[4096]; // odd number for testing continuations int usedBytes = 0; char[] chars = new char[4096]; int usedChars = 0; char[] currentLine = new char[128]; int currentLineIndex = 0; int index = 0; while (true) { int readBytes = stream.Read(bytes, usedBytes, bytes.Length - usedBytes); usedBytes += readBytes; // if there is an incomplete multi-byte sequence at the end of the file, Convert() // will eat it without reporting. int usedBytes1, usedChars1; bool completed; decoder.Convert(bytes, 0, usedBytes, chars, usedChars, chars.Length - usedChars, readBytes == 0/*flush*/, out usedBytes1, out usedChars1, out completed); Array.Copy(bytes, usedBytes1, bytes, 0, usedBytes - usedBytes1); usedBytes -= usedBytes1; usedChars += usedChars1; int i; for (i = 0; i < usedChars; i++) { bool lineBreak = false; if (chars[i] == '\r') { if (!(i + 1 < usedChars)) { if (i == 0) { lineBreak = true; lineEndingInfo.macintoshLFCount++; } else { break; } } else { lineBreak = true; if (chars[i + 1] == '\n') { i++; lineEndingInfo.windowsLFCount++; } else { lineEndingInfo.macintoshLFCount++; } } } else if (chars[i] == '\n') { lineBreak = true; lineEndingInfo.unixLFCount++; } if (lineBreak) { text.Insert(index, Encode(currentLine, 0, currentLineIndex)); currentLineIndex = 0; index++; } else { if (currentLine.Length == currentLineIndex) { Array.Resize(ref currentLine, currentLine.Length * 2); } currentLine[currentLineIndex++] = chars[i]; } } Array.Copy(chars, i, chars, 0, usedChars - i); usedChars -= i; if ((usedBytes == 0) && (readBytes == 0) && (usedChars == 0)) { break; } } Debug.Assert(index == text.GetLineCount() - 1); text.SetLine(index, Encode(currentLine, 0, currentLineIndex)); text.modified = false; return text; }