private ReadWithCommentResultType ProcessBuffer(int bufferLen, out int unprocessedCharacters) { var buffSpan = Buffer.Buffer.Span; ReaderStateMachine.AdvanceResult?inBatchableResult = null; var consistentResultSince = -1; for (var i = 0; i < bufferLen; i++) { var c = buffSpan[i]; var res = StateMachine.Advance(c, false); var advanceIBy = 0; // we only see this _if_ there are multiple characters in the separator, which is rare if (res == ReaderStateMachine.AdvanceResult.LookAhead_MultiCharacterSeparator) { var valSepLen = Configuration.ValueSeparatorMemory.Length; // do we have enough in the buffer to look ahead? var canCheckForSeparator = bufferLen - i >= valSepLen; if (canCheckForSeparator) { var shouldMatch = buffSpan.Slice(i, valSepLen); var eq = Utils.AreEqual(shouldMatch, Configuration.ValueSeparatorMemory.Span); if (eq) { // treat it like a value separator res = StateMachine.AdvanceValueSeparator(); // advance further to the last character in the separator advanceIBy = valSepLen - 1; } else { res = StateMachine.Advance(c, true); } } else { // we don't have enough in the buffer... so deal with any running batches and ask for more if (inBatchableResult != null) { switch (inBatchableResult.Value) { case ReaderStateMachine.AdvanceResult.Skip_Character: // there's no distinction between skipping several characters and skipping one // so this doesn't need the length Partial.SkipCharacter(); break; case ReaderStateMachine.AdvanceResult.Append_Character: var length = i - consistentResultSince; Partial.AppendCharacters(buffSpan, consistentResultSince, length); break; default: unprocessedCharacters = default; Throw.ImpossibleException($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}", Configuration.Options); return(default); } } // we need to keep everything unprocessedCharacters = bufferLen - i; return(ReadWithCommentResultType.NoValue); } } var handledUpTo = i; // we _might_ need to modify i a bit if we just processed the fallout from a multi-char value separator i += advanceIBy; // try and batch skips and appends // to save time on copying AND on // basically pointless method calls if (inBatchableResult != null) { if (res == inBatchableResult) { continue; } else { switch (inBatchableResult.Value) { case ReaderStateMachine.AdvanceResult.Skip_Character: // there's no distinction between skipping several characters and skipping one // so this doesn't need the length Partial.SkipCharacter(); break; case ReaderStateMachine.AdvanceResult.Append_Character: var length = handledUpTo - consistentResultSince; Partial.AppendCharacters(buffSpan, consistentResultSince, length); break; default: unprocessedCharacters = default; Throw.ImpossibleException($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}", Configuration.Options); return(default); } inBatchableResult = null; consistentResultSince = -1; // fall through into the switch to handle the current character } } // inBatchableResult is always null here // because if it's NOT null we either continue (if res == inBatchableResult), // thereby not hitting this point, or set it to null (if res != inBatchableResult) // this means we don't need to handle the inBatchableResult != null cases in // the following switch switch (res) { case ReaderStateMachine.AdvanceResult.Skip_Character: inBatchableResult = ReaderStateMachine.AdvanceResult.Skip_Character; consistentResultSince = i; continue; case ReaderStateMachine.AdvanceResult.Append_Character: inBatchableResult = ReaderStateMachine.AdvanceResult.Append_Character; consistentResultSince = i; continue; case ReaderStateMachine.AdvanceResult.Append_CarriageReturnAndCurrentCharacter: Partial.AppendCarriageReturn(buffSpan); Partial.AppendCharacters(buffSpan, i, 1); break; case ReaderStateMachine.AdvanceResult.Append_ValueSeparator: Partial.AppendCharactersFromDifferentBuffer(buffSpan, Configuration.ValueSeparatorMemory.Span); break; case ReaderStateMachine.AdvanceResult.Append_CarriageReturnAndValueSeparator: Partial.AppendCarriageReturn(buffSpan); Partial.AppendCharactersFromDifferentBuffer(buffSpan, Configuration.ValueSeparatorMemory.Span); break; // cannot reach ReaderStateMachine.AdvanceResult.Append_CarriageReturnAndEndComment, because that only happens // when the data ENDs case ReaderStateMachine.AdvanceResult.Finished_Unescaped_Value: PushPendingCharactersToValue(false); break; case ReaderStateMachine.AdvanceResult.Finished_Escaped_Value: PushPendingCharactersToValue(true); break; case ReaderStateMachine.AdvanceResult.Finished_LastValueUnescaped_Record: if (Partial.PendingCharsCount > 0) { PushPendingCharactersToValue(false); } unprocessedCharacters = bufferLen - i - 1; return(ReadWithCommentResultType.HasValue); case ReaderStateMachine.AdvanceResult.Finished_LastValueEscaped_Record: if (Partial.PendingCharsCount > 0) { PushPendingCharactersToValue(true); } unprocessedCharacters = bufferLen - i - 1; return(ReadWithCommentResultType.HasValue); case ReaderStateMachine.AdvanceResult.Finished_Comment: unprocessedCharacters = bufferLen - i - 1; return(ReadWithCommentResultType.HasComment); default: HandleUncommonAdvanceResults(res, c); break; } } // handle any batch that was still pending if (inBatchableResult != null) { switch (inBatchableResult.Value) { case ReaderStateMachine.AdvanceResult.Skip_Character: // there's no distinction between skipping several characters and skipping one // so this doesn't need the length Partial.SkipCharacter(); break; case ReaderStateMachine.AdvanceResult.Append_Character: // we read all the up to the end, so length needs to include the last character var length = bufferLen - consistentResultSince; Partial.AppendCharacters(buffSpan, consistentResultSince, length); break; default: unprocessedCharacters = default; Throw.ImpossibleException($"Unexpected {nameof(ReaderStateMachine.AdvanceResult)}: {inBatchableResult.Value}", Configuration.Options); return(default); } } unprocessedCharacters = 0; return(ReadWithCommentResultType.NoValue); }
private readonly unsafe bool TryLookupAdaptiveRadixTrie(string key, out int value) { AssertNotDisposedInternal(this); fixed(char *keyPtrConst = key) { // as the search continues, keyPtr will have the remaining // parts of the key char *keyPtr = keyPtrConst; int keyLen = key.Length; var trieSpan = Memory.Span; fixed(char *triePtrConst = trieSpan) { // likewise, triePtr will always be pointing at the _start_ // of the prefix group char *triePtr = triePtrConst; // starting point for processing a single prefix group // as we descend the trie we'll come back here processPrefixGroup: // this can read 1 past the end of keyPtr (if keyPtr == "") // but this is fine because key is always a string AND // .NET strings are always null terminated (with a zero // char not just a zero byte) var firstKeyChar = *keyPtr; var numPrefixes = FromPrefixCount(*triePtr); // advance past the prefix count triePtr++; for (var i = 0; i < numPrefixes; i++) { var prefixLen = FromPrefixLength(*triePtr); // move past the len, we're either pointing at the first // letter of the prefix OR the value/offset slot (if // prefixLen == 0) triePtr++; // the key being empty will only ever happen when i == 0 // and indicates that we need to either accept the current // value (if prefixLen == 0, that is the prefix is "") // or bail if (keyLen == 0) { if (prefixLen == 0) { // offset is already point at value, since prefixLen == 0 value = FromValue(*triePtr); return(true); } value = -1; return(false); } // terminal empty node, and keySpan is not empty if (prefixLen == 0) { // did not find key, skip the value and continue; triePtr++; continue; } var firstPrefixChar = *triePtr; // we've gone far enough that we're not going to find a prefix // that matches key (since this prefix occurs after key // lexicographically), bail if (firstKeyChar < firstPrefixChar) { value = -1; return(false); } else if (firstKeyChar > firstPrefixChar) { // key may be found after the current prefix (which occurs // before key lexiocographically), skip it triePtr += prefixLen; triePtr++; // skip the offset or value slot continue; } else { // key needs to match prefix, at least up to prefix length if (keyLen < prefixLen) { // key overlaps prefix, but the actual key value isn't in the trie, bail // // if key were in the trie, then the prefix would either match or // overlap the key (that is, there'd be 0 or more key chars // to process). // taking this branch means that some value that is key + "<some other chars>" // IS in the trie. value = -1; return(false); } // we've already checked the first char, but need to check the rest of the prefix if (!Utils.AreEqual(prefixLen - 1, keyPtr + 1, triePtr + 1)) { // key starts with the same char as prefix, but isn't actually equal to the prefix // which can only happen if key doesn't appear in the trie (if it did, the prefix // would be split after the last common character). value = -1; return(false); } // we're now pointing at the value / offset slot triePtr += prefixLen; // we've handled prefixLen number of chars in the key now var remainingKeyPtr = keyPtr + prefixLen; var remainingKeyLen = keyLen - prefixLen; // figure out if the current prefix is the terminal // part of a key, or if there's more work to be done. // // if there is more work to do, then we'll find an offset // to the next prefix group to process. var valueOrOffset = *triePtr; var isOffset = IsOffset(valueOrOffset); if (isOffset) { // jump to the group pointed to by offset var toNextGroupOffset = FromOffset(valueOrOffset); var nextGroupPtr = triePtr + toNextGroupOffset; // trim the parts of the key we've dealt with off keyPtr = remainingKeyPtr; keyLen = remainingKeyLen; // move the whole triePtr forward to the next group triePtr = nextGroupPtr; // start over at the new prefix group goto processPrefixGroup; } else { // if we've found a value in the trie, we can take it // only if key is fully consumed // otherwise, we know the key is not in the trie if (remainingKeyLen == 0) { value = FromValue(valueOrOffset); return(true); } value = -1; return(false); } } } } } // enumerated all the prefixes in this group, and key is still after them // lexicographically so we're never going to find it value = -1; return(false); }