Ejemplo n.º 1
0
        private ReadWithCommentResultType ProcessBuffer(int bufferLen, out int unprocessedCharacters)
        {
            var buffSpan = Buffer.Buffer.Span;

            ReaderStateMachine.AdvanceResult?inBatchableResult = null;
            var consistentResultSince = -1;

            for (var i = 0; i < bufferLen; i++)
            {
                var c   = buffSpan[i];
                var res = StateMachine.Advance(c, false);

                var advanceIBy = 0;

                // we only see this _if_ there are multiple characters in the separator, which is rare
                if (res == ReaderStateMachine.AdvanceResult.LookAhead_MultiCharacterSeparator)
                {
                    var valSepLen = Configuration.ValueSeparatorMemory.Length;

                    // do we have enough in the buffer to look ahead?
                    var canCheckForSeparator = bufferLen - i >= valSepLen;
                    if (canCheckForSeparator)
                    {
                        var shouldMatch = buffSpan.Slice(i, valSepLen);
                        var eq          = Utils.AreEqual(shouldMatch, Configuration.ValueSeparatorMemory.Span);
                        if (eq)
                        {
                            // treat it like a value separator
                            res = StateMachine.AdvanceValueSeparator();
                            // advance further to the last character in the separator
                            advanceIBy = valSepLen - 1;
                        }
                        else
                        {
                            res = StateMachine.Advance(c, true);
                        }
                    }
                    else
                    {
                        // we don't have enough in the buffer... so deal with any running batches and ask for more
                        if (inBatchableResult != null)
                        {
                            switch (inBatchableResult.Value)
                            {
                            case ReaderStateMachine.AdvanceResult.Skip_Character:

                                // there's no distinction between skipping several characters and skipping one
                                //    so this doesn't need the length
                                Partial.SkipCharacter();
                                break;

                            case ReaderStateMachine.AdvanceResult.Append_Character:
                                var length = i - consistentResultSince;

                                Partial.AppendCharacters(buffSpan, consistentResultSince, length);
                                break;

                            default:
                                unprocessedCharacters = default;
                                Throw.ImpossibleException($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}", Configuration.Options);
                                return(default);
                            }
                        }

                        // we need to keep everything
                        unprocessedCharacters = bufferLen - i;
                        return(ReadWithCommentResultType.NoValue);
                    }
                }

                var handledUpTo = i;

                // we _might_ need to modify i a bit if we just processed the fallout from a multi-char value separator
                i += advanceIBy;

                // try and batch skips and appends
                //   to save time on copying AND on
                //   basically pointless method calls
                if (inBatchableResult != null)
                {
                    if (res == inBatchableResult)
                    {
                        continue;
                    }
                    else
                    {
                        switch (inBatchableResult.Value)
                        {
                        case ReaderStateMachine.AdvanceResult.Skip_Character:

                            // there's no distinction between skipping several characters and skipping one
                            //    so this doesn't need the length
                            Partial.SkipCharacter();
                            break;

                        case ReaderStateMachine.AdvanceResult.Append_Character:
                            var length = handledUpTo - consistentResultSince;

                            Partial.AppendCharacters(buffSpan, consistentResultSince, length);
                            break;

                        default:
                            unprocessedCharacters = default;
                            Throw.ImpossibleException($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}", Configuration.Options);
                            return(default);
                        }

                        inBatchableResult     = null;
                        consistentResultSince = -1;

                        // fall through into the switch to handle the current character
                    }
                }

                // inBatchableResult is always null here
                //   because if it's NOT null we either continue (if res == inBatchableResult),
                //   thereby not hitting this point, or set it to null (if res != inBatchableResult)
                // this means we don't need to handle the inBatchableResult != null cases in
                //   the following switch

                switch (res)
                {
                case ReaderStateMachine.AdvanceResult.Skip_Character:
                    inBatchableResult     = ReaderStateMachine.AdvanceResult.Skip_Character;
                    consistentResultSince = i;
                    continue;

                case ReaderStateMachine.AdvanceResult.Append_Character:
                    inBatchableResult     = ReaderStateMachine.AdvanceResult.Append_Character;
                    consistentResultSince = i;
                    continue;

                case ReaderStateMachine.AdvanceResult.Append_CarriageReturnAndCurrentCharacter:
                    Partial.AppendCarriageReturn(buffSpan);
                    Partial.AppendCharacters(buffSpan, i, 1);
                    break;

                case ReaderStateMachine.AdvanceResult.Append_ValueSeparator:
                    Partial.AppendCharactersFromDifferentBuffer(buffSpan, Configuration.ValueSeparatorMemory.Span);
                    break;

                case ReaderStateMachine.AdvanceResult.Append_CarriageReturnAndValueSeparator:
                    Partial.AppendCarriageReturn(buffSpan);
                    Partial.AppendCharactersFromDifferentBuffer(buffSpan, Configuration.ValueSeparatorMemory.Span);
                    break;

                // cannot reach ReaderStateMachine.AdvanceResult.Append_CarriageReturnAndEndComment, because that only happens
                //   when the data ENDs

                case ReaderStateMachine.AdvanceResult.Finished_Unescaped_Value:
                    PushPendingCharactersToValue(false);
                    break;

                case ReaderStateMachine.AdvanceResult.Finished_Escaped_Value:
                    PushPendingCharactersToValue(true);
                    break;

                case ReaderStateMachine.AdvanceResult.Finished_LastValueUnescaped_Record:
                    if (Partial.PendingCharsCount > 0)
                    {
                        PushPendingCharactersToValue(false);
                    }

                    unprocessedCharacters = bufferLen - i - 1;
                    return(ReadWithCommentResultType.HasValue);

                case ReaderStateMachine.AdvanceResult.Finished_LastValueEscaped_Record:
                    if (Partial.PendingCharsCount > 0)
                    {
                        PushPendingCharactersToValue(true);
                    }

                    unprocessedCharacters = bufferLen - i - 1;
                    return(ReadWithCommentResultType.HasValue);

                case ReaderStateMachine.AdvanceResult.Finished_Comment:
                    unprocessedCharacters = bufferLen - i - 1;
                    return(ReadWithCommentResultType.HasComment);

                default:
                    HandleUncommonAdvanceResults(res, c);
                    break;
                }
            }

            // handle any batch that was still pending
            if (inBatchableResult != null)
            {
                switch (inBatchableResult.Value)
                {
                case ReaderStateMachine.AdvanceResult.Skip_Character:
                    // there's no distinction between skipping several characters and skipping one
                    //    so this doesn't need the length
                    Partial.SkipCharacter();
                    break;

                case ReaderStateMachine.AdvanceResult.Append_Character:
                    // we read all the up to the end, so length needs to include the last character
                    var length = bufferLen - consistentResultSince;

                    Partial.AppendCharacters(buffSpan, consistentResultSince, length);
                    break;

                default:
                    unprocessedCharacters = default;
                    Throw.ImpossibleException($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}", Configuration.Options);
                    return(default);
                }
            }

            unprocessedCharacters = 0;
            return(ReadWithCommentResultType.NoValue);
        }
Ejemplo n.º 2
0
        private readonly unsafe bool TryLookupAdaptiveRadixTrie(string key, out int value)
        {
            AssertNotDisposedInternal(this);

            fixed(char *keyPtrConst = key)
            {
                // as the search continues, keyPtr will have the remaining
                //  parts of the key
                char *keyPtr = keyPtrConst;
                int   keyLen = key.Length;

                var trieSpan = Memory.Span;

                fixed(char *triePtrConst = trieSpan)
                {
                    // likewise, triePtr will always be pointing at the _start_
                    //   of the prefix group
                    char *triePtr = triePtrConst;

// starting point for processing a single prefix group
//   as we descend the trie we'll come back here
processPrefixGroup:

// this can read 1 past the end of keyPtr (if keyPtr == "")
//   but this is fine because key is always a string AND
//   .NET strings are always null terminated (with a zero
//   char not just a zero byte)
                    var firstKeyChar = *keyPtr;
                    var numPrefixes = FromPrefixCount(*triePtr);

                    // advance past the prefix count
                    triePtr++;
                    for (var i = 0; i < numPrefixes; i++)
                    {
                        var prefixLen = FromPrefixLength(*triePtr);
                        // move past the len, we're either pointing at the first
                        //   letter of the prefix OR the value/offset slot (if
                        //   prefixLen == 0)
                        triePtr++;

                        // the key being empty will only ever happen when i == 0
                        //   and indicates that we need to either accept the current
                        //   value (if prefixLen == 0, that is the prefix is "")
                        //   or bail
                        if (keyLen == 0)
                        {
                            if (prefixLen == 0)
                            {
                                // offset is already point at value, since prefixLen == 0
                                value = FromValue(*triePtr);
                                return(true);
                            }

                            value = -1;
                            return(false);
                        }

                        // terminal empty node, and keySpan is not empty
                        if (prefixLen == 0)
                        {
                            // did not find key, skip the value and continue;
                            triePtr++;
                            continue;
                        }

                        var firstPrefixChar = *triePtr;

                        // we've gone far enough that we're not going to find a prefix
                        //   that matches key (since this prefix occurs after key
                        //   lexicographically), bail
                        if (firstKeyChar < firstPrefixChar)
                        {
                            value = -1;
                            return(false);
                        }
                        else if (firstKeyChar > firstPrefixChar)
                        {
                            // key may be found after the current prefix (which occurs
                            //   before key lexiocographically), skip it
                            triePtr += prefixLen;
                            triePtr++;    // skip the offset or value slot
                            continue;
                        }
                        else
                        {
                            // key needs to match prefix, at least up to prefix length

                            if (keyLen < prefixLen)
                            {
                                // key overlaps prefix, but the actual key value isn't in the trie, bail
                                //
                                // if key were in the trie, then the prefix would either match or
                                //   overlap the key (that is, there'd be 0 or more key chars
                                //   to process).
                                // taking this branch means that some value that is key + "<some other chars>"
                                //   IS in the trie.
                                value = -1;
                                return(false);
                            }

                            // we've already checked the first char, but need to check the rest of the prefix
                            if (!Utils.AreEqual(prefixLen - 1, keyPtr + 1, triePtr + 1))
                            {
                                // key starts with the same char as prefix, but isn't actually equal to the prefix
                                //   which can only happen if key doesn't appear in the trie (if it did, the prefix
                                //   would be split after the last common character).

                                value = -1;
                                return(false);
                            }

                            // we're now pointing at the value / offset slot
                            triePtr += prefixLen;

                            // we've handled prefixLen number of chars in the key now
                            var remainingKeyPtr = keyPtr + prefixLen;
                            var remainingKeyLen = keyLen - prefixLen;

                            // figure out if the current prefix is the terminal
                            //   part of a key, or if there's more work to be done.
                            //
                            // if there is more work to do, then we'll find an offset
                            //   to the next prefix group to process.
                            var valueOrOffset = *triePtr;
                            var isOffset      = IsOffset(valueOrOffset);

                            if (isOffset)
                            {
                                // jump to the group pointed to by offset

                                var toNextGroupOffset = FromOffset(valueOrOffset);

                                var nextGroupPtr = triePtr + toNextGroupOffset;

                                // trim the parts of the key we've dealt with off
                                keyPtr = remainingKeyPtr;
                                keyLen = remainingKeyLen;

                                //  move the whole triePtr forward to the next group
                                triePtr = nextGroupPtr;

                                // start over at the new prefix group
                                goto processPrefixGroup;
                            }
                            else
                            {
                                // if we've found a value in the trie, we can take it
                                //   only if key is fully consumed
                                // otherwise, we know the key is not in the trie
                                if (remainingKeyLen == 0)
                                {
                                    value = FromValue(valueOrOffset);
                                    return(true);
                                }

                                value = -1;
                                return(false);
                            }
                        }
                    }
                }
            }

            // enumerated all the prefixes in this group, and key is still after them
            //   lexicographically so we're never going to find it
            value = -1;
            return(false);
        }