Пример #1
0
        private String8 Between(String8 text, String8 prefix, String8 suffix)
        {
            // Find (first) prefix
            int index = text.IndexOf(prefix);

            // If prefix not found, return full text
            if (index == -1)
            {
                return(text);
            }

            // If prefix at end of value, return empty
            int afterIndex = index + prefix.Length;

            if (afterIndex >= text.Length)
            {
                return(String8.Empty);
            }

            // Find first suffix after prefix end
            int beforeIndex = text.IndexOf(suffix, afterIndex);

            // If suffix not found, return everything after prefix
            if (beforeIndex == -1)
            {
                return(text.Substring(afterIndex));
            }

            // Otherwise, return between
            return(text.Substring(afterIndex, beforeIndex - afterIndex));
        }
Пример #2
0
        public static void WriteHtmlEscaped(String8 value, ITabularWriter writer)
        {
            writer.WriteValueStart();

            int writeFrom = 0;

            while (true)
            {
                // Look for an Html Tag
                int startOfTag = value.IndexOf((byte)'<', writeFrom);
                if (startOfTag == -1)
                {
                    break;
                }

                // Write up to the tag
                writer.WriteValuePart(value.Substring(writeFrom, startOfTag - writeFrom));

                // Find the end of the tag
                int endOfTag = value.IndexOf((byte)'>', startOfTag + 1);
                if (endOfTag == -1)
                {
                    // Error: Unclosed tag, don't write anything else
                    writeFrom = value.Length;
                    break;
                }

                writeFrom = endOfTag + 1;
            }

            // Write the value after the last tag
            writer.WriteValuePart(value.Substring(writeFrom));

            writer.WriteValueEnd();
        }
Пример #3
0
        public void String8_ShiftBack()
        {
            String8Block block = new String8Block();

            // Goal: Split on semi-colon, collapse semi-colon and spaces in-place
            String8 shiftable  = "One; Two;Three; Four".TestConvert();
            int     totalShift = 0;

            String8Set parts = shiftable.Split(UTF8.Semicolon, new PartialArray <int>(5, false));

            for (int i = 0; i < parts.Count; ++i)
            {
                String8 part = parts[i];

                totalShift++;
                if (part.StartsWith(UTF8.Space))
                {
                    part = part.Substring(1);
                    totalShift++;
                }

                String8 beforeShift = block.GetCopy(part);
                String8 shifted     = part.ShiftBack(totalShift);
                Assert.AreEqual(beforeShift, shifted);
            }

            String8 result = shiftable.Substring(0, shiftable.Length - totalShift);

            Assert.AreNotEqual("OneTwoThreeFour", result.ToString());
        }
Пример #4
0
        /// <summary>
        ///  NextBlock is called by NextRow before reading the last row in _currentBlock.
        ///  Since the file is read in blocks, the last row is usually incomplete.
        ///
        ///  If there's more file content, NextBlock should copy the last row to the start
        ///  of the buffer, read more content, and reset _currentBlock to the new split rows
        ///  and _nextRowIndexInBlock to zero (telling NextRow to read that row next).
        ///
        ///  If there's no more file, the last row is complete. NextBlock must return
        ///  without changing _currentBlock or _nextRowIndexInBlock to tell NextRow it's safe
        ///  to return to the user.
        ///
        ///  NextRow will call NextBlock *again* after the last row. NextBlock must again
        ///  not change anything to tell NextRow that there's nothing left.
        ///
        ///  So, NextBlock must:
        ///   - Copy the last row to the start of the buffer (if not already there)
        ///   - Read more content to fill the buffer
        ///   - Split the buffer into rows
        ///   - Stop at end-of-file or when a full row was read
        ///   - Double the buffer until one of these conditions is met
        ///
        ///   - Reset nextRowInIndexBlock *only if* a row was shifted or read
        /// </summary>
        private void NextBlock()
        {
            int bufferLengthFilledStart = 0;

            // Copy the last row to the start of the buffer (if not already there)
            if (_currentBlock.Count > 1)
            {
                String8 lastRow = _currentBlock[_currentBlock.Count - 1];
                lastRow.WriteTo(_buffer, 0);
                bufferLengthFilledStart = lastRow.Length;

                // Reset the next row to read (since we shifted a row)
                _nextRowIndexInBlock = 0;
            }

            int bufferLengthFilled = bufferLengthFilledStart;

            while (true)
            {
                // Read more content to fill the buffer
                bufferLengthFilled += _stream.Read(_buffer, bufferLengthFilled, _buffer.Length - bufferLengthFilled);

                String8 block = new String8(_buffer, 0, bufferLengthFilled);

                // Strip leading UTF8 BOM, if found, on first block
                if (_stream.Position == bufferLengthFilled)
                {
                    if (block.Length >= 3 && block[0] == 0xEF && block[1] == 0xBB && block[2] == 0xBF)
                    {
                        block = block.Substring(3);
                    }
                }

                // Split the buffer into rows
                _currentBlock = _splitRows(block, _rowPositionArray);

                // Stop at end-of-file (read didn't fill buffer)
                if (bufferLengthFilled < _buffer.Length)
                {
                    break;
                }

                // Stop when a full row was read (split found at least two parts)
                if (_currentBlock.Count > 1)
                {
                    break;
                }

                // Otherwise, double the buffer (until a full row or end of file)
                byte[] newBuffer = new byte[_buffer.Length * 2];
                _buffer.CopyTo(newBuffer, 0);
                _buffer = newBuffer;
            }

            // If we read new content, reset the next row to read
            if (bufferLengthFilled > bufferLengthFilledStart)
            {
                _nextRowIndexInBlock = 0;
            }
        }
Пример #5
0
 protected override String8Set SplitCells(String8 row, PartialArray <int> cellPositionArray)
 {
     // Remove trailing '\r' to handle '\r\n' and '\n' line endings uniformly
     if (row.EndsWith(UTF8.CR))
     {
         row = row.Substring(0, row.Length - 1);
     }
     return(row.SplitAndDecodeCsvCells(cellPositionArray));
 }
Пример #6
0
        private String8 BeforeFirst(String8 text, String8 value)
        {
            int index = text.IndexOf(value);

            if (index == -1)
            {
                return(text);
            }
            return(text.Substring(0, index));
        }
Пример #7
0
 private String8 Truncate(String8 text, int limit)
 {
     if (text.Length < limit)
     {
         return(text);
     }
     else
     {
         return(text.Substring(0, limit));
     }
 }
Пример #8
0
        protected override void WriteCellValue(Stream stream, String8 value)
        {
            // Escaping: If value contains cell or row delimiter, just omit them
            // No standard for TSV escaping.
            int nextWriteStartIndex = 0;
            int end = value.Index + value.Length;

            for (int i = value.Index; i < end; ++i)
            {
                byte c = value.Array[i];
                if (c == UTF8.Tab || c == UTF8.Newline)
                {
                    int inStringIndex = i - value.Index;
                    value.Substring(nextWriteStartIndex, inStringIndex - nextWriteStartIndex).WriteTo(stream);
                    nextWriteStartIndex = inStringIndex + 1;
                }
            }

            value.Substring(nextWriteStartIndex).WriteTo(stream);
        }
Пример #9
0
        /// <summary>
        ///  Write UTF8 content escaped properly to be in double-quotes, but don't
        ///  write the surrounding quotes.
        /// </summary>
        /// <param name="value">UTF8 value to write</param>
        private void WriteEscaped(String8 value)
        {
            int nextWriteStartIndex = 0;

            int end = value.Index + value.Length;

            for (int i = value.Index; i < end; ++i)
            {
                byte c         = value.Array[i];
                bool isControl = c < 32;

                if (isControl || c == UTF8.Backslash || c == UTF8.Quote)
                {
                    int inStringIndex = i - value.Index;

                    // Write everything before this escaped portion
                    value.Substring(nextWriteStartIndex, inStringIndex - nextWriteStartIndex).WriteTo(_stream);

                    // Write the escaped character
                    if (isControl)
                    {
                        s_escapedCharPrefix.WriteTo(_stream);
                        _stream.WriteByte(ToHexDigit(c / 16));
                        _stream.WriteByte(ToHexDigit(c & 0xF));
                    }
                    else
                    {
                        _stream.WriteByte(UTF8.Backslash);
                        _stream.WriteByte(c);
                    }

                    // Track the next block which doesn't need escaping
                    nextWriteStartIndex = inStringIndex + 1;
                }
            }

            // Write the trailing unescaped block
            value.Substring(nextWriteStartIndex).WriteTo(_stream);
        }
Пример #10
0
        public void String8_StartsWithEndsWith()
        {
            string  collections  = "Collections";
            String8 collections8 = collections.TestConvert();

            string  collectionsCasing  = "coLLecTionS";
            String8 collectionsCasing8 = collectionsCasing.TestConvert();

            Assert.IsFalse(String8.Empty.StartsWith(UTF8.Space));
            Assert.IsFalse(String8.Empty.EndsWith(UTF8.Space));

            Assert.IsTrue(collections8.StartsWith((byte)'C'));
            Assert.IsFalse(collections8.StartsWith((byte)'c'));
            Assert.IsFalse(collections8.StartsWith(UTF8.Newline));

            Assert.IsTrue(collections8.EndsWith((byte)'s'));
            Assert.IsFalse(collections8.EndsWith((byte)'S'));
            Assert.IsFalse(collections8.EndsWith(UTF8.Newline));

            Assert.IsFalse(String8.Empty.StartsWith(collections8));
            Assert.IsFalse(String8.Empty.EndsWith(collections8));
            Assert.IsFalse(String8.Empty.StartsWith(collections8, true));
            Assert.IsFalse(String8.Empty.EndsWith(collections8, true));

            Assert.IsTrue(collections8.EndsWith(collections8));
            Assert.IsTrue(collections8.EndsWith(collections8.Substring(1)));
            Assert.IsTrue(collections8.EndsWith(collections8.Substring(8)));
            Assert.IsFalse(collections8.EndsWith(collectionsCasing8));
            Assert.IsTrue(collections8.EndsWith(collectionsCasing8, true));

            Assert.IsTrue(collections8.StartsWith(collections8));
            Assert.IsTrue(collections8.StartsWith(collections8.Substring(0, collections8.Length - 1)));
            Assert.IsTrue(collections8.StartsWith(collections8.Substring(0, 3)));
            Assert.IsFalse(collections8.StartsWith(collectionsCasing8));
            Assert.IsTrue(collections8.StartsWith(collectionsCasing8, true));
        }
Пример #11
0
        protected override void WriteValuePart(Stream stream, String8 value)
        {
            // Look for quotes in string
            int nextWriteStartIndex = 0;
            int end = value._index + value._length;

            for (int i = value._index; i < end; ++i)
            {
                byte c = value._buffer[i];
                if (c == UTF8.Quote)
                {
                    // Write everything including quote
                    int inStringIndex = i - value._index;
                    value.Substring(nextWriteStartIndex, inStringIndex - nextWriteStartIndex).WriteTo(stream);
                    nextWriteStartIndex = inStringIndex;

                    // Write a second quote
                    stream.WriteByte(UTF8.Quote);
                }
            }

            // Write content after the last quote seen
            value.Substring(nextWriteStartIndex).WriteTo(stream);
        }
Пример #12
0
        /// <summary>
        ///  Move the reader to the next row. This must be called before
        ///  reading the first row.
        /// </summary>
        /// <returns>True if another row exists, False if the TSV is out of content</returns>
        public bool NextRow()
        {
            // If we're on the last row, ask for more (we don't read the last row in case it was only partially read into the buffer)
            if (_nextRowIndexInBlock >= _currentBlock.Count - 1)
            {
                NextBlock();
            }

            // If there are no more rows, return false
            if (_nextRowIndexInBlock >= _currentBlock.Count)
            {
                return(false);
            }

            // Get the next (complete) row from the current block
            String8 currentLine = _currentBlock[_nextRowIndexInBlock];

            // Strip leading UTF8 BOM, if found, on first row
            if (_rowCountRead == 0)
            {
                if (currentLine.Length >= 3 && currentLine[0] == 0xEF && currentLine[1] == 0xBB && currentLine[2] == 0xBF)
                {
                    currentLine = currentLine.Substring(3);
                }
            }

            // Split the line into cells
            _currentRow = SplitCells(currentLine, _cellPositionArray);

            _rowCountRead++;
            _nextRowIndexInBlock++;

            // Allocate a set of reusable String8TabularValues to avoid per-cell-value allocation or boxing.
            if (_valueBoxes == null || _valueBoxes.Length < _currentRow.Count)
            {
                _valueBoxes = new String8TabularValue[_currentRow.Count];

                for (int i = 0; i < _valueBoxes.Length; ++i)
                {
                    _valueBoxes[i] = new String8TabularValue();
                }
            }

            return(true);
        }
Пример #13
0
        private String8 AfterFirst(String8 text, String8 value)
        {
            int index = text.IndexOf(value);

            if (index == -1)
            {
                return(text);
            }

            int afterIndex = index + value.Length;

            if (afterIndex >= text.Length)
            {
                return(String8.Empty);
            }

            return(text.Substring(afterIndex));
        }
 private string MatchToString(Match2 match, String8 content)
 {
     return($"({match.Index}, {match.Length}: '{content.Substring(match.Index, match.Length)}')");
 }
Пример #15
0
        public bool NextRow()
        {
            _currentRowBlock.Clear();

            String8 row = _reader.NextRow();

            if (row.IsEmpty())
            {
                return(false);
            }

            // Clear values for row
            for (int i = 0; i < _currentRowValues.Length; ++i)
            {
                _currentRowValues[i].SetValue(String8.Empty);
            }

            // Read available complete lines
            String8 currentPropertyName  = String8.Empty;
            String8 currentPropertyValue = String8.Empty;
            bool    currentIsBase64      = false;

            for (; _nextLineIndex < _blockLines.Count; ++_nextLineIndex)
            {
                String8 line = _blockLines[_nextLineIndex];

                // Skip comment lines and grouping lines
                if (line.StartsWith(UTF8.Pound) || line.StartsWith(UTF8.Dash))
                {
                    continue;
                }

                // Trim trailing CR, if found
                if (line.EndsWith(UTF8.CR))
                {
                    line = line.Substring(0, line.Length - 1);
                }

                // An empty line or out of lines for the row range
                if (line.Length == 0 || line.Index >= row.Index + row.Length)
                {
                    break;
                }

                // Look for a wrapped line
                if (line[0] == UTF8.Space)
                {
                    // If found, concatenate the value after the space onto the value so far
                    line = line.Substring(1);
                    currentPropertyValue = _currentRowBlock.Concatenate(currentPropertyValue, String8.Empty, line);
                }
                else
                {
                    // Set or Append the value just completed
                    SetColumnValue(currentPropertyName, currentPropertyValue, currentIsBase64);

                    // Split the property name and value [value is after colon and optional space]
                    currentPropertyName  = line.BeforeFirst(UTF8.Colon);
                    currentPropertyValue = line.Substring(currentPropertyName.Length + 1);
                    if (currentPropertyValue.StartsWith(UTF8.Space))
                    {
                        currentPropertyValue = currentPropertyValue.Substring(1);
                    }

                    // Determine if the value is encoded
                    currentIsBase64 = (line[currentPropertyName.Length + 1] == UTF8.Colon);
                    if (currentIsBase64)
                    {
                        currentPropertyValue = currentPropertyValue.Substring(1);
                    }
                }
            }

            // Set the last property value
            SetColumnValue(currentPropertyName, currentPropertyValue, currentIsBase64);

            // The next row starts after the row separator line
            _nextLineIndex++;

            this.RowCountRead++;
            return(true);
        }