private String8 Between(String8 text, String8 prefix, String8 suffix) { // Find (first) prefix int index = text.IndexOf(prefix); // If prefix not found, return full text if (index == -1) { return(text); } // If prefix at end of value, return empty int afterIndex = index + prefix.Length; if (afterIndex >= text.Length) { return(String8.Empty); } // Find first suffix after prefix end int beforeIndex = text.IndexOf(suffix, afterIndex); // If suffix not found, return everything after prefix if (beforeIndex == -1) { return(text.Substring(afterIndex)); } // Otherwise, return between return(text.Substring(afterIndex, beforeIndex - afterIndex)); }
public static void WriteHtmlEscaped(String8 value, ITabularWriter writer) { writer.WriteValueStart(); int writeFrom = 0; while (true) { // Look for an Html Tag int startOfTag = value.IndexOf((byte)'<', writeFrom); if (startOfTag == -1) { break; } // Write up to the tag writer.WriteValuePart(value.Substring(writeFrom, startOfTag - writeFrom)); // Find the end of the tag int endOfTag = value.IndexOf((byte)'>', startOfTag + 1); if (endOfTag == -1) { // Error: Unclosed tag, don't write anything else writeFrom = value.Length; break; } writeFrom = endOfTag + 1; } // Write the value after the last tag writer.WriteValuePart(value.Substring(writeFrom)); writer.WriteValueEnd(); }
public void String8_ShiftBack() { String8Block block = new String8Block(); // Goal: Split on semi-colon, collapse semi-colon and spaces in-place String8 shiftable = "One; Two;Three; Four".TestConvert(); int totalShift = 0; String8Set parts = shiftable.Split(UTF8.Semicolon, new PartialArray <int>(5, false)); for (int i = 0; i < parts.Count; ++i) { String8 part = parts[i]; totalShift++; if (part.StartsWith(UTF8.Space)) { part = part.Substring(1); totalShift++; } String8 beforeShift = block.GetCopy(part); String8 shifted = part.ShiftBack(totalShift); Assert.AreEqual(beforeShift, shifted); } String8 result = shiftable.Substring(0, shiftable.Length - totalShift); Assert.AreNotEqual("OneTwoThreeFour", result.ToString()); }
/// <summary> /// NextBlock is called by NextRow before reading the last row in _currentBlock. /// Since the file is read in blocks, the last row is usually incomplete. /// /// If there's more file content, NextBlock should copy the last row to the start /// of the buffer, read more content, and reset _currentBlock to the new split rows /// and _nextRowIndexInBlock to zero (telling NextRow to read that row next). /// /// If there's no more file, the last row is complete. NextBlock must return /// without changing _currentBlock or _nextRowIndexInBlock to tell NextRow it's safe /// to return to the user. /// /// NextRow will call NextBlock *again* after the last row. NextBlock must again /// not change anything to tell NextRow that there's nothing left. /// /// So, NextBlock must: /// - Copy the last row to the start of the buffer (if not already there) /// - Read more content to fill the buffer /// - Split the buffer into rows /// - Stop at end-of-file or when a full row was read /// - Double the buffer until one of these conditions is met /// /// - Reset nextRowInIndexBlock *only if* a row was shifted or read /// </summary> private void NextBlock() { int bufferLengthFilledStart = 0; // Copy the last row to the start of the buffer (if not already there) if (_currentBlock.Count > 1) { String8 lastRow = _currentBlock[_currentBlock.Count - 1]; lastRow.WriteTo(_buffer, 0); bufferLengthFilledStart = lastRow.Length; // Reset the next row to read (since we shifted a row) _nextRowIndexInBlock = 0; } int bufferLengthFilled = bufferLengthFilledStart; while (true) { // Read more content to fill the buffer bufferLengthFilled += _stream.Read(_buffer, bufferLengthFilled, _buffer.Length - bufferLengthFilled); String8 block = new String8(_buffer, 0, bufferLengthFilled); // Strip leading UTF8 BOM, if found, on first block if (_stream.Position == bufferLengthFilled) { if (block.Length >= 3 && block[0] == 0xEF && block[1] == 0xBB && block[2] == 0xBF) { block = block.Substring(3); } } // Split the buffer into rows _currentBlock = _splitRows(block, _rowPositionArray); // Stop at end-of-file (read didn't fill buffer) if (bufferLengthFilled < _buffer.Length) { break; } // Stop when a full row was read (split found at least two parts) if (_currentBlock.Count > 1) { break; } // Otherwise, double the buffer (until a full row or end of file) byte[] newBuffer = new byte[_buffer.Length * 2]; _buffer.CopyTo(newBuffer, 0); _buffer = newBuffer; } // If we read new content, reset the next row to read if (bufferLengthFilled > bufferLengthFilledStart) { _nextRowIndexInBlock = 0; } }
protected override String8Set SplitCells(String8 row, PartialArray <int> cellPositionArray) { // Remove trailing '\r' to handle '\r\n' and '\n' line endings uniformly if (row.EndsWith(UTF8.CR)) { row = row.Substring(0, row.Length - 1); } return(row.SplitAndDecodeCsvCells(cellPositionArray)); }
private String8 BeforeFirst(String8 text, String8 value) { int index = text.IndexOf(value); if (index == -1) { return(text); } return(text.Substring(0, index)); }
private String8 Truncate(String8 text, int limit) { if (text.Length < limit) { return(text); } else { return(text.Substring(0, limit)); } }
protected override void WriteCellValue(Stream stream, String8 value) { // Escaping: If value contains cell or row delimiter, just omit them // No standard for TSV escaping. int nextWriteStartIndex = 0; int end = value.Index + value.Length; for (int i = value.Index; i < end; ++i) { byte c = value.Array[i]; if (c == UTF8.Tab || c == UTF8.Newline) { int inStringIndex = i - value.Index; value.Substring(nextWriteStartIndex, inStringIndex - nextWriteStartIndex).WriteTo(stream); nextWriteStartIndex = inStringIndex + 1; } } value.Substring(nextWriteStartIndex).WriteTo(stream); }
/// <summary> /// Write UTF8 content escaped properly to be in double-quotes, but don't /// write the surrounding quotes. /// </summary> /// <param name="value">UTF8 value to write</param> private void WriteEscaped(String8 value) { int nextWriteStartIndex = 0; int end = value.Index + value.Length; for (int i = value.Index; i < end; ++i) { byte c = value.Array[i]; bool isControl = c < 32; if (isControl || c == UTF8.Backslash || c == UTF8.Quote) { int inStringIndex = i - value.Index; // Write everything before this escaped portion value.Substring(nextWriteStartIndex, inStringIndex - nextWriteStartIndex).WriteTo(_stream); // Write the escaped character if (isControl) { s_escapedCharPrefix.WriteTo(_stream); _stream.WriteByte(ToHexDigit(c / 16)); _stream.WriteByte(ToHexDigit(c & 0xF)); } else { _stream.WriteByte(UTF8.Backslash); _stream.WriteByte(c); } // Track the next block which doesn't need escaping nextWriteStartIndex = inStringIndex + 1; } } // Write the trailing unescaped block value.Substring(nextWriteStartIndex).WriteTo(_stream); }
public void String8_StartsWithEndsWith() { string collections = "Collections"; String8 collections8 = collections.TestConvert(); string collectionsCasing = "coLLecTionS"; String8 collectionsCasing8 = collectionsCasing.TestConvert(); Assert.IsFalse(String8.Empty.StartsWith(UTF8.Space)); Assert.IsFalse(String8.Empty.EndsWith(UTF8.Space)); Assert.IsTrue(collections8.StartsWith((byte)'C')); Assert.IsFalse(collections8.StartsWith((byte)'c')); Assert.IsFalse(collections8.StartsWith(UTF8.Newline)); Assert.IsTrue(collections8.EndsWith((byte)'s')); Assert.IsFalse(collections8.EndsWith((byte)'S')); Assert.IsFalse(collections8.EndsWith(UTF8.Newline)); Assert.IsFalse(String8.Empty.StartsWith(collections8)); Assert.IsFalse(String8.Empty.EndsWith(collections8)); Assert.IsFalse(String8.Empty.StartsWith(collections8, true)); Assert.IsFalse(String8.Empty.EndsWith(collections8, true)); Assert.IsTrue(collections8.EndsWith(collections8)); Assert.IsTrue(collections8.EndsWith(collections8.Substring(1))); Assert.IsTrue(collections8.EndsWith(collections8.Substring(8))); Assert.IsFalse(collections8.EndsWith(collectionsCasing8)); Assert.IsTrue(collections8.EndsWith(collectionsCasing8, true)); Assert.IsTrue(collections8.StartsWith(collections8)); Assert.IsTrue(collections8.StartsWith(collections8.Substring(0, collections8.Length - 1))); Assert.IsTrue(collections8.StartsWith(collections8.Substring(0, 3))); Assert.IsFalse(collections8.StartsWith(collectionsCasing8)); Assert.IsTrue(collections8.StartsWith(collectionsCasing8, true)); }
protected override void WriteValuePart(Stream stream, String8 value) { // Look for quotes in string int nextWriteStartIndex = 0; int end = value._index + value._length; for (int i = value._index; i < end; ++i) { byte c = value._buffer[i]; if (c == UTF8.Quote) { // Write everything including quote int inStringIndex = i - value._index; value.Substring(nextWriteStartIndex, inStringIndex - nextWriteStartIndex).WriteTo(stream); nextWriteStartIndex = inStringIndex; // Write a second quote stream.WriteByte(UTF8.Quote); } } // Write content after the last quote seen value.Substring(nextWriteStartIndex).WriteTo(stream); }
/// <summary> /// Move the reader to the next row. This must be called before /// reading the first row. /// </summary> /// <returns>True if another row exists, False if the TSV is out of content</returns> public bool NextRow() { // If we're on the last row, ask for more (we don't read the last row in case it was only partially read into the buffer) if (_nextRowIndexInBlock >= _currentBlock.Count - 1) { NextBlock(); } // If there are no more rows, return false if (_nextRowIndexInBlock >= _currentBlock.Count) { return(false); } // Get the next (complete) row from the current block String8 currentLine = _currentBlock[_nextRowIndexInBlock]; // Strip leading UTF8 BOM, if found, on first row if (_rowCountRead == 0) { if (currentLine.Length >= 3 && currentLine[0] == 0xEF && currentLine[1] == 0xBB && currentLine[2] == 0xBF) { currentLine = currentLine.Substring(3); } } // Split the line into cells _currentRow = SplitCells(currentLine, _cellPositionArray); _rowCountRead++; _nextRowIndexInBlock++; // Allocate a set of reusable String8TabularValues to avoid per-cell-value allocation or boxing. if (_valueBoxes == null || _valueBoxes.Length < _currentRow.Count) { _valueBoxes = new String8TabularValue[_currentRow.Count]; for (int i = 0; i < _valueBoxes.Length; ++i) { _valueBoxes[i] = new String8TabularValue(); } } return(true); }
private String8 AfterFirst(String8 text, String8 value) { int index = text.IndexOf(value); if (index == -1) { return(text); } int afterIndex = index + value.Length; if (afterIndex >= text.Length) { return(String8.Empty); } return(text.Substring(afterIndex)); }
private string MatchToString(Match2 match, String8 content) { return($"({match.Index}, {match.Length}: '{content.Substring(match.Index, match.Length)}')"); }
public bool NextRow() { _currentRowBlock.Clear(); String8 row = _reader.NextRow(); if (row.IsEmpty()) { return(false); } // Clear values for row for (int i = 0; i < _currentRowValues.Length; ++i) { _currentRowValues[i].SetValue(String8.Empty); } // Read available complete lines String8 currentPropertyName = String8.Empty; String8 currentPropertyValue = String8.Empty; bool currentIsBase64 = false; for (; _nextLineIndex < _blockLines.Count; ++_nextLineIndex) { String8 line = _blockLines[_nextLineIndex]; // Skip comment lines and grouping lines if (line.StartsWith(UTF8.Pound) || line.StartsWith(UTF8.Dash)) { continue; } // Trim trailing CR, if found if (line.EndsWith(UTF8.CR)) { line = line.Substring(0, line.Length - 1); } // An empty line or out of lines for the row range if (line.Length == 0 || line.Index >= row.Index + row.Length) { break; } // Look for a wrapped line if (line[0] == UTF8.Space) { // If found, concatenate the value after the space onto the value so far line = line.Substring(1); currentPropertyValue = _currentRowBlock.Concatenate(currentPropertyValue, String8.Empty, line); } else { // Set or Append the value just completed SetColumnValue(currentPropertyName, currentPropertyValue, currentIsBase64); // Split the property name and value [value is after colon and optional space] currentPropertyName = line.BeforeFirst(UTF8.Colon); currentPropertyValue = line.Substring(currentPropertyName.Length + 1); if (currentPropertyValue.StartsWith(UTF8.Space)) { currentPropertyValue = currentPropertyValue.Substring(1); } // Determine if the value is encoded currentIsBase64 = (line[currentPropertyName.Length + 1] == UTF8.Colon); if (currentIsBase64) { currentPropertyValue = currentPropertyValue.Substring(1); } } } // Set the last property value SetColumnValue(currentPropertyName, currentPropertyValue, currentIsBase64); // The next row starts after the row separator line _nextLineIndex++; this.RowCountRead++; return(true); }