예제 #1
0
        private ReadWithCommentResultType ProcessBuffer(int bufferLen, out int unprocessedCharacters)
        {
            var buffSpan = Buffer.Buffer.Span;

            ReaderStateMachine.AdvanceResult?inBatchableResult = null;
            var consistentResultSince = -1;

            for (var i = 0; i < bufferLen; i++)
            {
                var c   = buffSpan[i];
                var res = StateMachine.Advance(c, false);

                var advanceIBy = 0;

                // we only see this _if_ there are multiple characters in the separator, which is rare
                if (res == ReaderStateMachine.AdvanceResult.LookAhead_MultiCharacterSeparator)
                {
                    var valSepLen = Configuration.ValueSeparatorMemory.Length;

                    // do we have enough in the buffer to look ahead?
                    var canCheckForSeparator = bufferLen - i >= valSepLen;
                    if (canCheckForSeparator)
                    {
                        var shouldMatch = buffSpan.Slice(i, valSepLen);
                        var eq          = Utils.AreEqual(shouldMatch, Configuration.ValueSeparatorMemory.Span);
                        if (eq)
                        {
                            // treat it like a value separator
                            res = StateMachine.AdvanceValueSeparator();
                            // advance further to the last character in the separator
                            advanceIBy = valSepLen - 1;
                        }
                        else
                        {
                            res = StateMachine.Advance(c, true);
                        }
                    }
                    else
                    {
                        // we don't have enough in the buffer... so deal with any running batches and ask for more
                        if (inBatchableResult != null)
                        {
                            switch (inBatchableResult.Value)
                            {
                            case ReaderStateMachine.AdvanceResult.Skip_Character:

                                // there's no distinction between skipping several characters and skipping one
                                //    so this doesn't need the length
                                Partial.SkipCharacter();
                                break;

                            case ReaderStateMachine.AdvanceResult.Append_Character:
                                var length = i - consistentResultSince;

                                Partial.AppendCharacters(buffSpan, consistentResultSince, length);
                                break;

                            default:
                                unprocessedCharacters = default;
                                Throw.ImpossibleException($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}", Configuration.Options);
                                return(default);
                            }
                        }

                        // we need to keep everything
                        unprocessedCharacters = bufferLen - i;
                        return(ReadWithCommentResultType.NoValue);
                    }
                }

                var handledUpTo = i;

                // we _might_ need to modify i a bit if we just processed the fallout from a multi-char value separator
                i += advanceIBy;

                // try and batch skips and appends
                //   to save time on copying AND on
                //   basically pointless method calls
                if (inBatchableResult != null)
                {
                    if (res == inBatchableResult)
                    {
                        continue;
                    }
                    else
                    {
                        switch (inBatchableResult.Value)
                        {
                        case ReaderStateMachine.AdvanceResult.Skip_Character:

                            // there's no distinction between skipping several characters and skipping one
                            //    so this doesn't need the length
                            Partial.SkipCharacter();
                            break;

                        case ReaderStateMachine.AdvanceResult.Append_Character:
                            var length = handledUpTo - consistentResultSince;

                            Partial.AppendCharacters(buffSpan, consistentResultSince, length);
                            break;

                        default:
                            unprocessedCharacters = default;
                            Throw.ImpossibleException($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}", Configuration.Options);
                            return(default);
                        }

                        inBatchableResult     = null;
                        consistentResultSince = -1;

                        // fall through into the switch to handle the current character
                    }
                }

                // inBatchableResult is always null here
                //   because if it's NOT null we either continue (if res == inBatchableResult),
                //   thereby not hitting this point, or set it to null (if res != inBatchableResult)
                // this means we don't need to handle the inBatchableResult != null cases in
                //   the following switch

                switch (res)
                {
                case ReaderStateMachine.AdvanceResult.Skip_Character:
                    inBatchableResult     = ReaderStateMachine.AdvanceResult.Skip_Character;
                    consistentResultSince = i;
                    continue;

                case ReaderStateMachine.AdvanceResult.Append_Character:
                    inBatchableResult     = ReaderStateMachine.AdvanceResult.Append_Character;
                    consistentResultSince = i;
                    continue;

                case ReaderStateMachine.AdvanceResult.Append_CarriageReturnAndCurrentCharacter:
                    Partial.AppendCarriageReturn(buffSpan);
                    Partial.AppendCharacters(buffSpan, i, 1);
                    break;

                case ReaderStateMachine.AdvanceResult.Append_ValueSeparator:
                    Partial.AppendCharactersFromDifferentBuffer(buffSpan, Configuration.ValueSeparatorMemory.Span);
                    break;

                case ReaderStateMachine.AdvanceResult.Append_CarriageReturnAndValueSeparator:
                    Partial.AppendCarriageReturn(buffSpan);
                    Partial.AppendCharactersFromDifferentBuffer(buffSpan, Configuration.ValueSeparatorMemory.Span);
                    break;

                // cannot reach ReaderStateMachine.AdvanceResult.Append_CarriageReturnAndEndComment, because that only happens
                //   when the data ENDs

                case ReaderStateMachine.AdvanceResult.Finished_Unescaped_Value:
                    PushPendingCharactersToValue(false);
                    break;

                case ReaderStateMachine.AdvanceResult.Finished_Escaped_Value:
                    PushPendingCharactersToValue(true);
                    break;

                case ReaderStateMachine.AdvanceResult.Finished_LastValueUnescaped_Record:
                    if (Partial.PendingCharsCount > 0)
                    {
                        PushPendingCharactersToValue(false);
                    }

                    unprocessedCharacters = bufferLen - i - 1;
                    return(ReadWithCommentResultType.HasValue);

                case ReaderStateMachine.AdvanceResult.Finished_LastValueEscaped_Record:
                    if (Partial.PendingCharsCount > 0)
                    {
                        PushPendingCharactersToValue(true);
                    }

                    unprocessedCharacters = bufferLen - i - 1;
                    return(ReadWithCommentResultType.HasValue);

                case ReaderStateMachine.AdvanceResult.Finished_Comment:
                    unprocessedCharacters = bufferLen - i - 1;
                    return(ReadWithCommentResultType.HasComment);

                default:
                    HandleUncommonAdvanceResults(res, c);
                    break;
                }
            }

            // handle any batch that was still pending
            if (inBatchableResult != null)
            {
                switch (inBatchableResult.Value)
                {
                case ReaderStateMachine.AdvanceResult.Skip_Character:
                    // there's no distinction between skipping several characters and skipping one
                    //    so this doesn't need the length
                    Partial.SkipCharacter();
                    break;

                case ReaderStateMachine.AdvanceResult.Append_Character:
                    // we read all the up to the end, so length needs to include the last character
                    var length = bufferLen - consistentResultSince;

                    Partial.AppendCharacters(buffSpan, consistentResultSince, length);
                    break;

                default:
                    unprocessedCharacters = default;
                    Throw.ImpossibleException($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}", Configuration.Options);
                    return(default);
                }
            }

            unprocessedCharacters = 0;
            return(ReadWithCommentResultType.NoValue);
        }
예제 #2
0
        private bool ProcessBuffer(int bufferLen, out int unprocessedCharacters)
        {
            var buffSpan = Buffer.Buffer.Span;

            ReaderStateMachine.AdvanceResult?inBatchableResult = null;
            var consistentResultSince = -1;

            for (var i = 0; i < bufferLen; i++)
            {
                var c   = buffSpan[i];
                var res = StateMachine.Advance(c);

                // try and batch skips and appends
                //   to save time on copying AND on
                //   basically pointless method calls
                if (inBatchableResult != null)
                {
                    if (res == inBatchableResult)
                    {
                        continue;
                    }
                    else
                    {
                        switch (inBatchableResult.Value)
                        {
                        case ReaderStateMachine.AdvanceResult.Skip_Character:

                            // there's no distinction between skipping several characters and skipping one
                            //    so this doesn't need the length
                            Partial.SkipCharacter();
                            break;

                        case ReaderStateMachine.AdvanceResult.Append_Character:
                            var length = i - consistentResultSince;

                            Partial.AppendCharacters(buffSpan, consistentResultSince, length);
                            break;

                        default:
                            Throw.Exception($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}");
                            break;
                        }

                        inBatchableResult     = null;
                        consistentResultSince = -1;

                        // fall through into the switch to handle the current character
                    }
                }

                switch (res)
                {
                case ReaderStateMachine.AdvanceResult.Skip_Character:
                    if (inBatchableResult == null)
                    {
                        inBatchableResult     = ReaderStateMachine.AdvanceResult.Skip_Character;
                        consistentResultSince = i;
                        continue;
                    }

                    Partial.SkipCharacter();
                    break;

                case ReaderStateMachine.AdvanceResult.Append_Character:
                    if (inBatchableResult == null)
                    {
                        inBatchableResult     = ReaderStateMachine.AdvanceResult.Append_Character;
                        consistentResultSince = i;
                        continue;
                    }

                    Partial.AppendCharacters(buffSpan, i, 1);
                    break;

                case ReaderStateMachine.AdvanceResult.Finished_Value:
                    PushPendingCharactersToValue();
                    break;

                case ReaderStateMachine.AdvanceResult.Finished_Record:
                    if (Partial.PendingCharsCount > 0)
                    {
                        PushPendingCharactersToValue();
                    }

                    unprocessedCharacters = bufferLen - i - 1;
                    return(true);

                case ReaderStateMachine.AdvanceResult.Exception_ExpectedEndOfRecord:
                    Throw.InvalidOperationException($"Encountered '{c}' when expecting end of record");
                    break;

                case ReaderStateMachine.AdvanceResult.Exception_InvalidState:
                    Throw.InvalidOperationException($"Internal state machine is in an invalid state due to a previous error");
                    break;

                case ReaderStateMachine.AdvanceResult.Exception_StartEscapeInValue:
                    Throw.InvalidOperationException($"Encountered '{c}', starting an escaped value, when already in a value");
                    break;

                case ReaderStateMachine.AdvanceResult.Exception_UnexpectedCharacterInEscapeSequence:
                    Throw.InvalidOperationException($"Encountered '{c}' in an escape sequence, which is invalid");
                    break;

                case ReaderStateMachine.AdvanceResult.Exception_UnexpectedLineEnding:
                    Throw.Exception($"Unexpected {nameof(Cesil.RowEndings)} value encountered");
                    break;

                case ReaderStateMachine.AdvanceResult.Exception_UnexpectedState:
                    Throw.Exception($"Unexpected state value entered");
                    break;

                case ReaderStateMachine.AdvanceResult.Exception_ExpectedEndOfRecordOrValue:
                    Throw.InvalidOperationException($"Encountered '{c}' when expecting the end of a record or value");
                    break;

                default:
                    Throw.Exception($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {res}");
                    break;
                }
            }

            // handle any batch that was still pending
            if (inBatchableResult != null)
            {
                switch (inBatchableResult.Value)
                {
                case ReaderStateMachine.AdvanceResult.Skip_Character:

                    // there's no distinction between skipping several characters and skipping one
                    //    so this doesn't need the length
                    Partial.SkipCharacter();
                    break;

                case ReaderStateMachine.AdvanceResult.Append_Character:
                    // we read all the up to the end, so length needs to include the last character
                    var length = bufferLen - consistentResultSince;

                    Partial.AppendCharacters(buffSpan, consistentResultSince, length);
                    break;

                default:
                    Throw.Exception($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}");
                    break;
                }
            }

            unprocessedCharacters = 0;
            return(false);
        }