private ReadWithCommentResultType ProcessBuffer(int bufferLen, out int unprocessedCharacters) { var buffSpan = Buffer.Buffer.Span; ReaderStateMachine.AdvanceResult?inBatchableResult = null; var consistentResultSince = -1; for (var i = 0; i < bufferLen; i++) { var c = buffSpan[i]; var res = StateMachine.Advance(c, false); var advanceIBy = 0; // we only see this _if_ there are multiple characters in the separator, which is rare if (res == ReaderStateMachine.AdvanceResult.LookAhead_MultiCharacterSeparator) { var valSepLen = Configuration.ValueSeparatorMemory.Length; // do we have enough in the buffer to look ahead? var canCheckForSeparator = bufferLen - i >= valSepLen; if (canCheckForSeparator) { var shouldMatch = buffSpan.Slice(i, valSepLen); var eq = Utils.AreEqual(shouldMatch, Configuration.ValueSeparatorMemory.Span); if (eq) { // treat it like a value separator res = StateMachine.AdvanceValueSeparator(); // advance further to the last character in the separator advanceIBy = valSepLen - 1; } else { res = StateMachine.Advance(c, true); } } else { // we don't have enough in the buffer... so deal with any running batches and ask for more if (inBatchableResult != null) { switch (inBatchableResult.Value) { case ReaderStateMachine.AdvanceResult.Skip_Character: // there's no distinction between skipping several characters and skipping one // so this doesn't need the length Partial.SkipCharacter(); break; case ReaderStateMachine.AdvanceResult.Append_Character: var length = i - consistentResultSince; Partial.AppendCharacters(buffSpan, consistentResultSince, length); break; default: unprocessedCharacters = default; Throw.ImpossibleException($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}", Configuration.Options); return(default); } } // we need to keep everything unprocessedCharacters = bufferLen - i; return(ReadWithCommentResultType.NoValue); } } var handledUpTo = i; // we _might_ need to modify i a bit if we just processed the fallout from a multi-char value separator i += advanceIBy; // try and batch skips and appends // to save time on copying AND on // basically pointless method calls if (inBatchableResult != null) { if (res == inBatchableResult) { continue; } else { switch (inBatchableResult.Value) { case ReaderStateMachine.AdvanceResult.Skip_Character: // there's no distinction between skipping several characters and skipping one // so this doesn't need the length Partial.SkipCharacter(); break; case ReaderStateMachine.AdvanceResult.Append_Character: var length = handledUpTo - consistentResultSince; Partial.AppendCharacters(buffSpan, consistentResultSince, length); break; default: unprocessedCharacters = default; Throw.ImpossibleException($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}", Configuration.Options); return(default); } inBatchableResult = null; consistentResultSince = -1; // fall through into the switch to handle the current character } } // inBatchableResult is always null here // because if it's NOT null we either continue (if res == inBatchableResult), // thereby not hitting this point, or set it to null (if res != inBatchableResult) // this means we don't need to handle the inBatchableResult != null cases in // the following switch switch (res) { case ReaderStateMachine.AdvanceResult.Skip_Character: inBatchableResult = ReaderStateMachine.AdvanceResult.Skip_Character; consistentResultSince = i; continue; case ReaderStateMachine.AdvanceResult.Append_Character: inBatchableResult = ReaderStateMachine.AdvanceResult.Append_Character; consistentResultSince = i; continue; case ReaderStateMachine.AdvanceResult.Append_CarriageReturnAndCurrentCharacter: Partial.AppendCarriageReturn(buffSpan); Partial.AppendCharacters(buffSpan, i, 1); break; case ReaderStateMachine.AdvanceResult.Append_ValueSeparator: Partial.AppendCharactersFromDifferentBuffer(buffSpan, Configuration.ValueSeparatorMemory.Span); break; case ReaderStateMachine.AdvanceResult.Append_CarriageReturnAndValueSeparator: Partial.AppendCarriageReturn(buffSpan); Partial.AppendCharactersFromDifferentBuffer(buffSpan, Configuration.ValueSeparatorMemory.Span); break; // cannot reach ReaderStateMachine.AdvanceResult.Append_CarriageReturnAndEndComment, because that only happens // when the data ENDs case ReaderStateMachine.AdvanceResult.Finished_Unescaped_Value: PushPendingCharactersToValue(false); break; case ReaderStateMachine.AdvanceResult.Finished_Escaped_Value: PushPendingCharactersToValue(true); break; case ReaderStateMachine.AdvanceResult.Finished_LastValueUnescaped_Record: if (Partial.PendingCharsCount > 0) { PushPendingCharactersToValue(false); } unprocessedCharacters = bufferLen - i - 1; return(ReadWithCommentResultType.HasValue); case ReaderStateMachine.AdvanceResult.Finished_LastValueEscaped_Record: if (Partial.PendingCharsCount > 0) { PushPendingCharactersToValue(true); } unprocessedCharacters = bufferLen - i - 1; return(ReadWithCommentResultType.HasValue); case ReaderStateMachine.AdvanceResult.Finished_Comment: unprocessedCharacters = bufferLen - i - 1; return(ReadWithCommentResultType.HasComment); default: HandleUncommonAdvanceResults(res, c); break; } } // handle any batch that was still pending if (inBatchableResult != null) { switch (inBatchableResult.Value) { case ReaderStateMachine.AdvanceResult.Skip_Character: // there's no distinction between skipping several characters and skipping one // so this doesn't need the length Partial.SkipCharacter(); break; case ReaderStateMachine.AdvanceResult.Append_Character: // we read all the up to the end, so length needs to include the last character var length = bufferLen - consistentResultSince; Partial.AppendCharacters(buffSpan, consistentResultSince, length); break; default: unprocessedCharacters = default; Throw.ImpossibleException($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}", Configuration.Options); return(default); } } unprocessedCharacters = 0; return(ReadWithCommentResultType.NoValue); }
private bool ProcessBuffer(int bufferLen, out int unprocessedCharacters) { var buffSpan = Buffer.Buffer.Span; ReaderStateMachine.AdvanceResult?inBatchableResult = null; var consistentResultSince = -1; for (var i = 0; i < bufferLen; i++) { var c = buffSpan[i]; var res = StateMachine.Advance(c); // try and batch skips and appends // to save time on copying AND on // basically pointless method calls if (inBatchableResult != null) { if (res == inBatchableResult) { continue; } else { switch (inBatchableResult.Value) { case ReaderStateMachine.AdvanceResult.Skip_Character: // there's no distinction between skipping several characters and skipping one // so this doesn't need the length Partial.SkipCharacter(); break; case ReaderStateMachine.AdvanceResult.Append_Character: var length = i - consistentResultSince; Partial.AppendCharacters(buffSpan, consistentResultSince, length); break; default: Throw.Exception($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}"); break; } inBatchableResult = null; consistentResultSince = -1; // fall through into the switch to handle the current character } } switch (res) { case ReaderStateMachine.AdvanceResult.Skip_Character: if (inBatchableResult == null) { inBatchableResult = ReaderStateMachine.AdvanceResult.Skip_Character; consistentResultSince = i; continue; } Partial.SkipCharacter(); break; case ReaderStateMachine.AdvanceResult.Append_Character: if (inBatchableResult == null) { inBatchableResult = ReaderStateMachine.AdvanceResult.Append_Character; consistentResultSince = i; continue; } Partial.AppendCharacters(buffSpan, i, 1); break; case ReaderStateMachine.AdvanceResult.Finished_Value: PushPendingCharactersToValue(); break; case ReaderStateMachine.AdvanceResult.Finished_Record: if (Partial.PendingCharsCount > 0) { PushPendingCharactersToValue(); } unprocessedCharacters = bufferLen - i - 1; return(true); case ReaderStateMachine.AdvanceResult.Exception_ExpectedEndOfRecord: Throw.InvalidOperationException($"Encountered '{c}' when expecting end of record"); break; case ReaderStateMachine.AdvanceResult.Exception_InvalidState: Throw.InvalidOperationException($"Internal state machine is in an invalid state due to a previous error"); break; case ReaderStateMachine.AdvanceResult.Exception_StartEscapeInValue: Throw.InvalidOperationException($"Encountered '{c}', starting an escaped value, when already in a value"); break; case ReaderStateMachine.AdvanceResult.Exception_UnexpectedCharacterInEscapeSequence: Throw.InvalidOperationException($"Encountered '{c}' in an escape sequence, which is invalid"); break; case ReaderStateMachine.AdvanceResult.Exception_UnexpectedLineEnding: Throw.Exception($"Unexpected {nameof(Cesil.RowEndings)} value encountered"); break; case ReaderStateMachine.AdvanceResult.Exception_UnexpectedState: Throw.Exception($"Unexpected state value entered"); break; case ReaderStateMachine.AdvanceResult.Exception_ExpectedEndOfRecordOrValue: Throw.InvalidOperationException($"Encountered '{c}' when expecting the end of a record or value"); break; default: Throw.Exception($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {res}"); break; } } // handle any batch that was still pending if (inBatchableResult != null) { switch (inBatchableResult.Value) { case ReaderStateMachine.AdvanceResult.Skip_Character: // there's no distinction between skipping several characters and skipping one // so this doesn't need the length Partial.SkipCharacter(); break; case ReaderStateMachine.AdvanceResult.Append_Character: // we read all the up to the end, so length needs to include the last character var length = bufferLen - consistentResultSince; Partial.AppendCharacters(buffSpan, consistentResultSince, length); break; default: Throw.Exception($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}"); break; } } unprocessedCharacters = 0; return(false); }