public static ILineBreaks CreateLineBreaks(string source) { ILineBreaksEditor lineBreaks = null; int index = 0; while (index < source.Length) { int breakLength = TextUtilities.LengthOfLineBreak(source, index, source.Length); if (breakLength == 0) { ++index; } else { if (lineBreaks == null) { lineBreaks = LineBreakManager.CreateLineBreakEditor(source.Length); } lineBreaks.Add(index, breakLength); index += breakLength; } } return(lineBreaks ?? Empty); }
/// <summary> /// Consolidate two string rebuilders, taking advantage of the fact that they have already extracted the line breaks. /// </summary> public static StringRebuilder Consolidate(StringRebuilder left, StringRebuilder right) { Debug.Assert(left.Length > 0); Debug.Assert(right.Length > 0); int length = left.Length + right.Length; char[] result = new char[length]; left.CopyTo(0, result, 0, left.Length); right.CopyTo(0, result, left.Length, right.Length); ILineBreaks lineBreaks; if ((left.LineBreakCount == 0) && (right.LineBreakCount == 0)) { lineBreaks = LineBreakManager.Empty; //_lineBreakSpan defaults to 0, 0 which is what we want } else { ILineBreaksEditor breaks = LineBreakManager.CreateLineBreakEditor(length, left.LineBreakCount + right.LineBreakCount); int offset = 0; if ((result[left.Length] == '\n') && (result[left.Length - 1] == '\r')) { //We have a \r\n spanning the seam ... add that as a special linebreak later. offset = 1; } int leftLines = left.LineBreakCount - offset; for (int i = 0; (i < leftLines); ++i) { Span extent; int lineBreakLength; left.GetLineFromLineNumber(i, out extent, out lineBreakLength); breaks.Add(extent.End, lineBreakLength); } if (offset == 1) { breaks.Add(left.Length - 1, 2); } for (int i = offset; (i < right.LineBreakCount); ++i) { Span extent; int lineBreakLength; right.GetLineFromLineNumber(i, out extent, out lineBreakLength); breaks.Add(extent.End + left.Length, lineBreakLength); } lineBreaks = breaks; } return(StringRebuilderForChars.Create(result, length, lineBreaks)); }
public static StringRebuilder Create(string text) { if (text == null) { throw new ArgumentNullException("text"); } #if DEBUG Interlocked.Add(ref _totalCharactersScanned, text.Length); #endif return((text.Length == 0) ? StringRebuilder.Empty : StringRebuilderForString.Create(text, text.Length, LineBreakManager.CreateLineBreaks(text))); }
// Evil performance hack (but we are on a hot path here): // thresholdForInvalidCharacters should be '\u0001' if we are throwing on invalid characters. // should be '\0' if we are not. // (otherwise we need to check both a throwOnInvalidCharacters boolean and that c == 0). private static ILineBreaks ParseBlock(char[] buffer, int length, char thresholdForInvalidCharacters, ref NewlineState newlineState, ref LeadingWhitespaceState leadingWhitespaceState, ref int currentLineLength, ref int longestLineLength, ref bool nextCharIsStartOfLine) { // Note that the lineBreaks created here will (internally) use the pooled list of line breaks. IPooledLineBreaksEditor lineBreaks = LineBreakManager.CreatePooledLineBreakEditor(length); int index = 0; while (index < length) { int breakLength = TextUtilities.LengthOfLineBreak(buffer, index, length); if (breakLength == 0) { char c = buffer[index]; // If we are checking for invalid characters, throw if we encounter a \0 if (c < thresholdForInvalidCharacters) { throw new FileFormatException("File contains NUL characters"); } ++currentLineLength; ++index; if (nextCharIsStartOfLine) { switch (c) { case ' ': leadingWhitespaceState.Increment(LeadingWhitespaceState.LineLeadingCharacter.Space, 1); break; case '\t': leadingWhitespaceState.Increment(LeadingWhitespaceState.LineLeadingCharacter.Tab, 1); break; default: leadingWhitespaceState.Increment(LeadingWhitespaceState.LineLeadingCharacter.Printable, 1); break; } nextCharIsStartOfLine = false; } } else { lineBreaks.Add(index, breakLength); longestLineLength = Math.Max(longestLineLength, currentLineLength); currentLineLength = 0; if (breakLength == 2) { newlineState.Increment(NewlineState.LineEnding.CRLF, 1); } else { switch (buffer[index]) { // This code needs to be kep consistent with TextUtilities.LengthOfLineBreak() case '\r': newlineState.Increment(NewlineState.LineEnding.CR, 1); break; case '\n': newlineState.Increment(NewlineState.LineEnding.LF, 1); break; case '\u0085': newlineState.Increment(NewlineState.LineEnding.NEL, 1); break; case '\u2028': newlineState.Increment(NewlineState.LineEnding.LS, 1); break; case '\u2029': newlineState.Increment(NewlineState.LineEnding.PS, 1); break; default: throw new InvalidOperationException("Unexpected line ending"); } } if (nextCharIsStartOfLine) { leadingWhitespaceState.Increment(LeadingWhitespaceState.LineLeadingCharacter.Empty, 1); } nextCharIsStartOfLine = true; } index += breakLength; } lineBreaks.ReleasePooledLineBreaks(); return(lineBreaks); }
internal static StringRebuilder Load(TextReader reader, long fileSize, string id, out bool hasConsistentLineEndings, out int longestLineLength, int blockSize = 0, int minCompressedBlockSize = TextImageLoader.BlockSize) // Exposed for unit tests { LineEndingState lineEnding = LineEndingState.Unknown; int currentLineLength = 0; longestLineLength = 0; bool useCompressedStringRebuilders = (fileSize >= TextModelOptions.CompressedStorageFileSizeThreshold); if (blockSize == 0) { blockSize = useCompressedStringRebuilders ? TextModelOptions.CompressedStoragePageSize : TextImageLoader.BlockSize; } PageManager pageManager = null; char[] buffer; if (useCompressedStringRebuilders) { pageManager = new PageManager(); buffer = new char[blockSize]; } else { buffer = TextImageLoader.AcquireBuffer(blockSize); } StringRebuilder content = StringRebuilderForChars.Empty; try { while (true) { int read = TextImageLoader.LoadNextBlock(reader, buffer); if (read == 0) { break; } var lineBreaks = LineBreakManager.CreateLineBreakEditor(read); TextImageLoader.ParseBlock(buffer, read, lineBreaks, ref lineEnding, ref currentLineLength, ref longestLineLength); char[] bufferForStringBuilder = buffer; if (read < (buffer.Length / 2)) { // We read far less characters than buffer so copy the contents to a new buffer and reuse the original buffer. bufferForStringBuilder = new char[read]; Array.Copy(buffer, bufferForStringBuilder, read); } else { // We're using most of bufferForStringRebuilder so allocate a new block for the next chunk. buffer = new char[blockSize]; } var newContent = (useCompressedStringRebuilders && (read > minCompressedBlockSize)) ? StringRebuilderForCompressedChars.Create(new Page(pageManager, bufferForStringBuilder, read), lineBreaks) : StringRebuilderForChars.Create(bufferForStringBuilder, read, lineBreaks); content = content.Insert(content.Length, newContent); } longestLineLength = Math.Max(longestLineLength, currentLineLength); hasConsistentLineEndings = lineEnding != LineEndingState.Inconsistent; } finally { if (!useCompressedStringRebuilders) { TextImageLoader.ReleaseBuffer(buffer); } } return(content); }
private static ILineBreaks ParseBlock(char[] buffer, int length, ref LineEndingState lineEnding, ref int currentLineLength, ref int longestLineLength) { // Note that the lineBreaks created here will (internally) use the pooled list of line breaks. IPooledLineBreaksEditor lineBreaks = LineBreakManager.CreatePooledLineBreakEditor(length); int index = 0; while (index < length) { int breakLength = TextUtilities.LengthOfLineBreak(buffer, index, length); if (breakLength == 0) { ++currentLineLength; ++index; } else { lineBreaks.Add(index, breakLength); longestLineLength = Math.Max(longestLineLength, currentLineLength); currentLineLength = 0; if (lineEnding != LineEndingState.Inconsistent) { if (breakLength == 2) { if (lineEnding == LineEndingState.Unknown) { lineEnding = LineEndingState.CRLF; } else if (lineEnding != LineEndingState.CRLF) { lineEnding = LineEndingState.Inconsistent; } } else { LineEndingState newLineEndingState; switch (buffer[index]) { // This code needs to be kep consistent with TextUtilities.LengthOfLineBreak() case '\r': newLineEndingState = LineEndingState.CR; break; case '\n': newLineEndingState = LineEndingState.LF; break; case '\u0085': newLineEndingState = LineEndingState.NEL; break; case '\u2028': newLineEndingState = LineEndingState.LS; break; case '\u2029': newLineEndingState = LineEndingState.PS; break; default: throw new InvalidOperationException("Unexpected line ending"); } if (lineEnding == LineEndingState.Unknown) { lineEnding = newLineEndingState; } else if (lineEnding != newLineEndingState) { lineEnding = LineEndingState.Inconsistent; } } } index += breakLength; } } lineBreaks.ReleasePooledLineBreaks(); return(lineBreaks); }