/// <summary> /// NextBlock is called by NextRow before reading the last row in _currentBlock. /// Since the file is read in blocks, the last row is usually incomplete. /// /// If there's more file content, NextBlock should copy the last row to the start /// of the buffer, read more content, and reset _currentBlock to the new split rows /// and _nextRowIndexInBlock to zero (telling NextRow to read that row next). /// /// If there's no more file, the last row is complete. NextBlock must return /// without changing _currentBlock or _nextRowIndexInBlock to tell NextRow it's safe /// to return to the user. /// /// NextRow will call NextBlock *again* after the last row. NextBlock must again /// not change anything to tell NextRow that there's nothing left. /// /// So, NextBlock must: /// - Copy the last row to the start of the buffer (if not already there) /// - Read more content to fill the buffer /// - Split the buffer into rows /// - Stop at end-of-file or when a full row was read /// - Double the buffer until one of these conditions is met /// /// - Reset nextRowInIndexBlock *only if* a row was shifted or read /// </summary> private void NextBlock() { int bufferLengthFilledStart = 0; // Copy the last row to the start of the buffer (if not already there) if (_currentBlock.Count > 1) { String8 lastRow = _currentBlock[_currentBlock.Count - 1]; lastRow.WriteTo(_buffer, 0); bufferLengthFilledStart = lastRow.Length; // Reset the next row to read (since we shifted a row) _nextRowIndexInBlock = 0; } int bufferLengthFilled = bufferLengthFilledStart; while (true) { // Read more content to fill the buffer bufferLengthFilled += _stream.Read(_buffer, bufferLengthFilled, _buffer.Length - bufferLengthFilled); String8 block = new String8(_buffer, 0, bufferLengthFilled); // Strip leading UTF8 BOM, if found, on first block if (_stream.Position == bufferLengthFilled) { if (block.Length >= 3 && block[0] == 0xEF && block[1] == 0xBB && block[2] == 0xBF) { block = block.Substring(3); } } // Split the buffer into rows _currentBlock = _splitRows(block, _rowPositionArray); // Stop at end-of-file (read didn't fill buffer) if (bufferLengthFilled < _buffer.Length) { break; } // Stop when a full row was read (split found at least two parts) if (_currentBlock.Count > 1) { break; } // Otherwise, double the buffer (until a full row or end of file) byte[] newBuffer = new byte[_buffer.Length * 2]; _buffer.CopyTo(newBuffer, 0); _buffer = newBuffer; } // If we read new content, reset the next row to read if (bufferLengthFilled > bufferLengthFilledStart) { _nextRowIndexInBlock = 0; } }
public static string SplitAndJoin(string value) { String8 value8 = String8.Convert(value, new byte[String8.GetLength(value)]); PartialArray <int> boundaryArray = new PartialArray <int>(); String8Set set = AlphanumericSplitter.Split(value8, ref boundaryArray); bool firstPart = true; StringBuilder result = new StringBuilder(); using (StringWriter writer = new StringWriter(result)) { for (int i = 0; i < set.Count; ++i) { String8 part = set[i]; if (!part.IsEmpty() && AlphanumericSplitter.IsAlphaNumeric(part[0])) { if (!firstPart) { writer.Write("|"); } firstPart = false; part.WriteTo(writer); } } } return(result.ToString()); }
public void Write(String8 value) { if (_currentRowColumnCount++ > 0) { _consoleOutStream.Write('\t'); } value.WriteTo(_consoleOutStream); }
public void SetColumns(IEnumerable <string> columnNames) { if (_columnCount != 0) { throw new InvalidOperationException("SetColumns may only be called once on a JsonTabularWriter."); } // { // "colIndex": { s_beforeColumnNames.WriteTo(_stream); int columnIndex = 0; foreach (string columnName in columnNames) { int length = String8.GetLength(columnName); if (_typeConversionBuffer == null || _typeConversionBuffer.Length < length) { _typeConversionBuffer = new byte[length]; } // , if (columnIndex > 0) { s_valueDelimiter.WriteTo(_stream); } // "ColumnName" _stream.WriteByte(UTF8.Quote); WriteEscaped(String8.Convert(columnName, _typeConversionBuffer)); _stream.WriteByte(UTF8.Quote); // : 0 _stream.WriteByte(UTF8.Colon); _stream.WriteByte(UTF8.Space); String8.FromInteger(columnIndex, _typeConversionBuffer).WriteTo(_stream); columnIndex++; } // }, // "rows": { s_afterColumnNames.WriteTo(_stream); _columnCount = columnIndex; }
/// <summary> /// Write a single byte properly escaped for a quoted string. /// </summary> /// <param name="c">UTF8 byte to write</param> private void WriteEscaped(byte c) { if (c < 32) { s_escapedCharPrefix.WriteTo(_stream); _stream.WriteByte(ToHexDigit(c / 16)); _stream.WriteByte(ToHexDigit(c & 0xF)); } else if (c == UTF8.Quote || c == UTF8.Backslash) { _stream.WriteByte(UTF8.Backslash); _stream.WriteByte(c); } else { _stream.WriteByte(c); } }
private unsafe void CompareHashAndCopy(String8 value) { ulong originalHash = Hashing.Hash(value, 0); int length = value.Length; byte[] copy = new byte[length]; value.WriteTo(copy, 0); String8 copy8 = new String8(copy, 0, length); ulong copyHash = Hashing.Hash(copy8, 0); Assert.AreEqual(originalHash, copyHash, String.Format("Hash of '{0}' didn't match a copy of itself.", value)); }
public void Append(XArray xarray) { Allocator.AllocateToSize(ref _positionsBuffer, xarray.Count); String8[] array = (String8[])xarray.Array; for (int i = 0; i < xarray.Count; ++i) { String8 value = array[xarray.Index(i)]; value.WriteTo(_bytesWriter); _position += value.Length; _positionsBuffer[i] = _position; } _positionsWriter.Append(XArray.All(_positionsBuffer, xarray.Count)); }
/// <summary> /// Compute a cryptographic hash of 'value' and 'hashKeyHash' together. /// Used to map values in the Sanitizer. /// </summary> /// <param name="value">Source value to hash</param> /// <param name="hashKeyHash">HashKey for this hash</param> /// <returns>uint of hash result</returns> public static uint Hash(String8 value, uint hashKeyHash) { if (s_hasher == null) { s_hasher = SHA256Managed.Create(); } if (s_buffer == null || s_buffer.Length < value.Length + 4) { s_buffer = new byte[value.Length + 4]; } s_buffer[0] = (byte)(hashKeyHash & 0xFF); s_buffer[1] = (byte)((hashKeyHash >> 8) & 0xFF); s_buffer[2] = (byte)((hashKeyHash >> 16) & 0xFF); s_buffer[3] = (byte)((hashKeyHash >> 24) & 0xFF); value.WriteTo(s_buffer, 4); byte[] hash = s_hasher.ComputeHash(s_buffer, 0, value.Length + 4); uint result = (uint)((hash[0] << 24) + (hash[1] << 16) + (hash[2] << 8) + hash[3]); return(result); }
public void WriteValuePart(String8 part) { part.WriteTo(_consoleOutStream); }