/// <summary> /// Read the next character in the stream (more expensive) /// </summary> private bool TryReadNextCharInternal(out char ch) { while (true) { if (this.charsConsumed < this.charsProduced) { ch = this.charBuffer[this.charsConsumed]; this.charsConsumed++; return(true); } // adjust stream position to where it should be for reading.. // this may matter if (this.stream.Position != this.startPosition + this.bytesConverted) { this.stream.Position = this.startPosition + this.bytesConverted; } this.startPosition = this.stream.Position; this.bytesRead = stream.Read(this.byteBuffer, 0, this.byteBuffer.Length); this.charsConsumed = 0; this.charsProduced = 0; if (this.bytesRead == 0) { ch = '\0'; return(false); } int conversionStart = 0; if (this.adjustFirstBlock) { conversionStart = EncodingUtil.GetNextCodeStart(this.byteBuffer, 0, this.bytesRead, this.encoding); this.startPosition += conversionStart; this.adjustFirstBlock = false; } bool completed; this.decoder.Convert(this.byteBuffer, conversionStart, this.bytesRead, this.charBuffer, 0, this.charBuffer.Length, true, out this.bytesConverted, out this.charsProduced, out completed); this.bytesConverted += conversionStart; } }
/// <summary> /// Divides the stream into separate partitions. /// </summary> /// <param name="stream">A stream over the entire file.</param> /// <param name="partitions">The number of similar size partitions requested.</param> /// <param name="encoding">An optional encoding for the file. If unspecified it will be inferred from the stream itself.</param> /// <returns>Returns an array of starting positions, one for each partition.</returns> public static long[] GetPartitionStarts(Stream stream, int partitions, Encoding encoding = null) { encoding = encoding ?? GetEncoding(stream); // give each stream initial starting point. var size = stream.Length / partitions; // position each stream at initial computed start position long[] starts = new long[partitions]; long start = 0; for (int i = 0; i < partitions; i++, start += size) { starts[i] = start + EncodingUtil.GetAlignment(start, encoding); } // adjust starting points by finding next actual line start for (int i = 1; i < partitions; i++) { // move ahead if the prior stream is already overlapping initial starting point if (starts[i - 1] > starts[i]) { starts[i] = starts[i - 1]; } stream.Position = starts[i]; var nextLineStart = FindNextLineStart(stream, encoding); if (nextLineStart > 0) { starts[i] = nextLineStart; } else { starts[i] = stream.Length; } } stream.Position = 0; return(starts); }