public static ILineBreaks CreateLineBreaks(string source)
        {
            ILineBreaksEditor lineBreaks = null;

            int index = 0;

            while (index < source.Length)
            {
                int breakLength = TextUtilities.LengthOfLineBreak(source, index, source.Length);
                if (breakLength == 0)
                {
                    ++index;
                }
                else
                {
                    if (lineBreaks == null)
                    {
                        lineBreaks = LineBreakManager.CreateLineBreakEditor(source.Length);
                    }

                    lineBreaks.Add(index, breakLength);
                    index += breakLength;
                }
            }

            return(lineBreaks ?? Empty);
        }
        /// <summary>
        /// Consolidate two string rebuilders, taking advantage of the fact that they have already extracted the line breaks.
        /// </summary>
        public static StringRebuilder Consolidate(StringRebuilder left, StringRebuilder right)
        {
            Debug.Assert(left.Length > 0);
            Debug.Assert(right.Length > 0);

            int length = left.Length + right.Length;

            char[] result = new char[length];

            left.CopyTo(0, result, 0, left.Length);
            right.CopyTo(0, result, left.Length, right.Length);

            ILineBreaks lineBreaks;

            if ((left.LineBreakCount == 0) && (right.LineBreakCount == 0))
            {
                lineBreaks = LineBreakManager.Empty;
                //_lineBreakSpan defaults to 0, 0 which is what we want
            }
            else
            {
                ILineBreaksEditor breaks = LineBreakManager.CreateLineBreakEditor(length, left.LineBreakCount + right.LineBreakCount);

                int offset = 0;
                if ((result[left.Length] == '\n') && (result[left.Length - 1] == '\r'))
                {
                    //We have a \r\n spanning the seam ... add that as a special linebreak later.
                    offset = 1;
                }

                int leftLines = left.LineBreakCount - offset;
                for (int i = 0; (i < leftLines); ++i)
                {
                    Span extent;
                    int  lineBreakLength;
                    left.GetLineFromLineNumber(i, out extent, out lineBreakLength);
                    breaks.Add(extent.End, lineBreakLength);
                }

                if (offset == 1)
                {
                    breaks.Add(left.Length - 1, 2);
                }

                for (int i = offset; (i < right.LineBreakCount); ++i)
                {
                    Span extent;
                    int  lineBreakLength;
                    right.GetLineFromLineNumber(i, out extent, out lineBreakLength);
                    breaks.Add(extent.End + left.Length, lineBreakLength);
                }

                lineBreaks = breaks;
            }

            return(StringRebuilderForChars.Create(result, length, lineBreaks));
        }
        public static StringRebuilder Create(string text)
        {
            if (text == null)
            {
                throw new ArgumentNullException("text");
            }
#if DEBUG
            Interlocked.Add(ref _totalCharactersScanned, text.Length);
#endif

            return((text.Length == 0)
                   ? StringRebuilder.Empty
                   : StringRebuilderForString.Create(text, text.Length, LineBreakManager.CreateLineBreaks(text)));
        }
        // Evil performance hack (but we are on a hot path here):
        //  thresholdForInvalidCharacters should be '\u0001' if we are throwing on invalid characters.
        //                                should be '\0' if we are not.
        // (otherwise we need to check both a throwOnInvalidCharacters boolean and that c == 0).
        private static ILineBreaks ParseBlock(char[] buffer, int length, char thresholdForInvalidCharacters,
                                              ref NewlineState newlineState,
                                              ref LeadingWhitespaceState leadingWhitespaceState,
                                              ref int currentLineLength,
                                              ref int longestLineLength,
                                              ref bool nextCharIsStartOfLine)
        {
            // Note that the lineBreaks created here will (internally) use the pooled list of line breaks.
            IPooledLineBreaksEditor lineBreaks = LineBreakManager.CreatePooledLineBreakEditor(length);

            int index = 0;

            while (index < length)
            {
                int breakLength = TextUtilities.LengthOfLineBreak(buffer, index, length);
                if (breakLength == 0)
                {
                    char c = buffer[index];

                    // If we are checking for invalid characters, throw if we encounter a \0
                    if (c < thresholdForInvalidCharacters)
                    {
                        throw new FileFormatException("File contains NUL characters");
                    }

                    ++currentLineLength;
                    ++index;

                    if (nextCharIsStartOfLine)
                    {
                        switch (c)
                        {
                        case ' ':
                            leadingWhitespaceState.Increment(LeadingWhitespaceState.LineLeadingCharacter.Space, 1);
                            break;

                        case '\t':
                            leadingWhitespaceState.Increment(LeadingWhitespaceState.LineLeadingCharacter.Tab, 1);
                            break;

                        default:
                            leadingWhitespaceState.Increment(LeadingWhitespaceState.LineLeadingCharacter.Printable, 1);
                            break;
                        }

                        nextCharIsStartOfLine = false;
                    }
                }
                else
                {
                    lineBreaks.Add(index, breakLength);
                    longestLineLength = Math.Max(longestLineLength, currentLineLength);
                    currentLineLength = 0;


                    if (breakLength == 2)
                    {
                        newlineState.Increment(NewlineState.LineEnding.CRLF, 1);
                    }
                    else
                    {
                        switch (buffer[index])
                        {
                        // This code needs to be kep consistent with TextUtilities.LengthOfLineBreak()
                        case '\r': newlineState.Increment(NewlineState.LineEnding.CR, 1); break;

                        case '\n': newlineState.Increment(NewlineState.LineEnding.LF, 1); break;

                        case '\u0085': newlineState.Increment(NewlineState.LineEnding.NEL, 1); break;

                        case '\u2028': newlineState.Increment(NewlineState.LineEnding.LS, 1); break;

                        case '\u2029': newlineState.Increment(NewlineState.LineEnding.PS, 1); break;

                        default: throw new InvalidOperationException("Unexpected line ending");
                        }
                    }

                    if (nextCharIsStartOfLine)
                    {
                        leadingWhitespaceState.Increment(LeadingWhitespaceState.LineLeadingCharacter.Empty, 1);
                    }

                    nextCharIsStartOfLine = true;
                }

                index += breakLength;
            }

            lineBreaks.ReleasePooledLineBreaks();

            return(lineBreaks);
        }
        internal static StringRebuilder Load(TextReader reader, long fileSize, string id,
                                             out bool hasConsistentLineEndings, out int longestLineLength,
                                             int blockSize = 0,
                                             int minCompressedBlockSize = TextImageLoader.BlockSize)                                             // Exposed for unit tests
        {
            LineEndingState lineEnding        = LineEndingState.Unknown;
            int             currentLineLength = 0;

            longestLineLength = 0;

            bool useCompressedStringRebuilders = (fileSize >= TextModelOptions.CompressedStorageFileSizeThreshold);

            if (blockSize == 0)
            {
                blockSize = useCompressedStringRebuilders ? TextModelOptions.CompressedStoragePageSize : TextImageLoader.BlockSize;
            }

            PageManager pageManager = null;

            char[] buffer;
            if (useCompressedStringRebuilders)
            {
                pageManager = new PageManager();
                buffer      = new char[blockSize];
            }
            else
            {
                buffer = TextImageLoader.AcquireBuffer(blockSize);
            }

            StringRebuilder content = StringRebuilderForChars.Empty;

            try
            {
                while (true)
                {
                    int read = TextImageLoader.LoadNextBlock(reader, buffer);

                    if (read == 0)
                    {
                        break;
                    }

                    var lineBreaks = LineBreakManager.CreateLineBreakEditor(read);
                    TextImageLoader.ParseBlock(buffer, read, lineBreaks, ref lineEnding, ref currentLineLength, ref longestLineLength);

                    char[] bufferForStringBuilder = buffer;
                    if (read < (buffer.Length / 2))
                    {
                        // We read far less characters than buffer so copy the contents to a new buffer and reuse the original buffer.
                        bufferForStringBuilder = new char[read];
                        Array.Copy(buffer, bufferForStringBuilder, read);
                    }
                    else
                    {
                        // We're using most of bufferForStringRebuilder so allocate a new block for the next chunk.
                        buffer = new char[blockSize];
                    }

                    var newContent = (useCompressedStringRebuilders && (read > minCompressedBlockSize))
                                     ? StringRebuilderForCompressedChars.Create(new Page(pageManager, bufferForStringBuilder, read), lineBreaks)
                                     : StringRebuilderForChars.Create(bufferForStringBuilder, read, lineBreaks);

                    content = content.Insert(content.Length, newContent);
                }

                longestLineLength        = Math.Max(longestLineLength, currentLineLength);
                hasConsistentLineEndings = lineEnding != LineEndingState.Inconsistent;
            }
            finally
            {
                if (!useCompressedStringRebuilders)
                {
                    TextImageLoader.ReleaseBuffer(buffer);
                }
            }

            return(content);
        }
Example #6
0
        private static ILineBreaks ParseBlock(char[] buffer, int length,
                                              ref LineEndingState lineEnding, ref int currentLineLength, ref int longestLineLength)
        {
            // Note that the lineBreaks created here will (internally) use the pooled list of line breaks.
            IPooledLineBreaksEditor lineBreaks = LineBreakManager.CreatePooledLineBreakEditor(length);

            int index = 0;

            while (index < length)
            {
                int breakLength = TextUtilities.LengthOfLineBreak(buffer, index, length);
                if (breakLength == 0)
                {
                    ++currentLineLength;
                    ++index;
                }
                else
                {
                    lineBreaks.Add(index, breakLength);
                    longestLineLength = Math.Max(longestLineLength, currentLineLength);
                    currentLineLength = 0;

                    if (lineEnding != LineEndingState.Inconsistent)
                    {
                        if (breakLength == 2)
                        {
                            if (lineEnding == LineEndingState.Unknown)
                            {
                                lineEnding = LineEndingState.CRLF;
                            }
                            else if (lineEnding != LineEndingState.CRLF)
                            {
                                lineEnding = LineEndingState.Inconsistent;
                            }
                        }
                        else
                        {
                            LineEndingState newLineEndingState;
                            switch (buffer[index])
                            {
                            // This code needs to be kep consistent with TextUtilities.LengthOfLineBreak()
                            case '\r': newLineEndingState = LineEndingState.CR; break;

                            case '\n': newLineEndingState = LineEndingState.LF; break;

                            case '\u0085': newLineEndingState = LineEndingState.NEL; break;

                            case '\u2028': newLineEndingState = LineEndingState.LS; break;

                            case '\u2029': newLineEndingState = LineEndingState.PS; break;

                            default: throw new InvalidOperationException("Unexpected line ending");
                            }

                            if (lineEnding == LineEndingState.Unknown)
                            {
                                lineEnding = newLineEndingState;
                            }
                            else if (lineEnding != newLineEndingState)
                            {
                                lineEnding = LineEndingState.Inconsistent;
                            }
                        }
                    }

                    index += breakLength;
                }
            }

            lineBreaks.ReleasePooledLineBreaks();

            return(lineBreaks);
        }