/// <summary> /// Tries to read next character from given <paramref name="stream"/>. /// </summary> /// <param name="stream">The <see cref="StreamReaderWithResizableBuffer"/> to read character from. The <see cref="StreamReaderWithResizableBuffer.TryReadMoreAsync(int)"/> method will be used.</param> /// <param name="charChecker">Optional callback to check character. If it is supplied, this method will keep reading characters until this callback returns <c>true</c>.</param> /// <returns>A task which will return character read, or <c>null</c> if no more characters could be read from <paramref name="stream"/>.</returns> /// <exception cref="ArgumentNullException">If <paramref name="stream"/> is <c>null</c>.</exception> /// <exception cref="InvalidOperationException">If this reader is currently busy with another read operation.</exception> public async ValueTask <Char?> TryReadNextCharacterAsync( StreamReaderWithResizableBuffer stream, Func <Char, Boolean> charChecker = null // If false -> will read next character ) { ArgumentValidator.ValidateNotNull(nameof(stream), stream); if (Interlocked.CompareExchange(ref this._state, BUSY, IDLE) == IDLE) { try { Boolean charReadSuccessful; var encoding = this.Encoding.Encoding; var auxArray = this._chars; var minChar = this._minChar; do { var arrayIndex = stream.ReadBytesCount; charReadSuccessful = await stream.TryReadMoreAsync(minChar); if (charReadSuccessful) { var charCount = 1; while (charCount == 1 && await stream.TryReadMoreAsync(minChar)) { charCount = encoding.GetCharCount(stream.Buffer, arrayIndex, stream.ReadBytesCount - arrayIndex); } if (charCount > 1) { // Unread peeked byte stream.UnreadBytes(minChar); } encoding.GetChars(stream.Buffer, arrayIndex, stream.ReadBytesCount - arrayIndex, auxArray, 0); } } while (charReadSuccessful && !(charChecker?.Invoke(auxArray[0]) ?? true)); return(charReadSuccessful ? auxArray[0] : (Char?)null); } finally { Interlocked.Exchange(ref this._state, IDLE); } } else { throw BusyException(); } }
private static async ValueTask <JToken> PerformReadJSONTTokenAsync( StreamReaderWithResizableBuffer stream, CharacterReader reader ) { stream.EraseReadBytesFromBuffer(); // Read first non-whitespace character Char charRead; Int32 prevIdx; do { prevIdx = stream.ReadBytesCount; charRead = await reader.ReadNextCharacterAsync(stream); } while (Char.IsWhiteSpace(charRead)); // We know what kind of JToken we will have based on a single character JToken retVal; Boolean encounteredContainerEnd; Int32 startIdx = stream.ReadBytesCount; Int32 curIdx; var encoding = reader.Encoding; switch (charRead) { case ARRAY_END: case OBJ_END: // This happens only when reading empty array/object, and this is called recursively. retVal = null; break; case ARRAY_START: var array = new JArray(); encounteredContainerEnd = false; // Reuse 'retVal' variable since we really need it only at the end of this case block. while (!encounteredContainerEnd && (retVal = await PerformReadJSONTTokenAsync(stream, reader)) != null) { array.Add(retVal); // Read next non-whitespace character - it will be either array value delimiter (',') or array end (']') charRead = await reader.ReadNextCharacterAsync(stream, c => !Char.IsWhiteSpace(c)); encounteredContainerEnd = charRead == ARRAY_END; } retVal = array; break; case OBJ_START: var obj = new JObject(); encounteredContainerEnd = false; String keyStr; // Reuse 'retVal' variable since we really need it only at the end of this case block. while (!encounteredContainerEnd && (keyStr = await ReadJSONStringAsync(reader, stream, false)) != null) { // First JToken should be string being the key // Skip whitespace and ':' charRead = await reader.ReadNextCharacterAsync(stream, c => !Char.IsWhiteSpace( c ) && c != OBJ_KEY_VALUE_DELIM); // Unread previously read character stream.UnreadBytes(reader.GetByteCount(charRead)); // Read another JToken, this one will be our value retVal = await PerformReadJSONTTokenAsync(stream, reader); obj.Add(keyStr, retVal); // Read next non-whitespace character - it will be either object value delimiter (','), or object end ('}') charRead = await reader.ReadNextCharacterAsync(stream, c => !Char.IsWhiteSpace(c)); encounteredContainerEnd = charRead == OBJ_END; } retVal = obj; break; case STR_START: retVal = new JValue(await ReadJSONStringAsync(reader, stream, true)); break; case 't': // Boolean true // read 'r' Validate(await reader.ReadNextCharacterAsync(stream), 'r'); // read 'u' Validate(await reader.ReadNextCharacterAsync(stream), 'u'); // read 'e' Validate(await reader.ReadNextCharacterAsync(stream), 'e'); retVal = new JValue(true); break; case 'f': //Boolean false // read 'a' Validate(await reader.ReadNextCharacterAsync(stream), 'a'); // read 'l' Validate(await reader.ReadNextCharacterAsync(stream), 'l'); // read 's' Validate(await reader.ReadNextCharacterAsync(stream), 's'); // read 'e' Validate(await reader.ReadNextCharacterAsync(stream), 'e'); retVal = new JValue(false); break; case 'n': // null // read 'u' Validate(await reader.ReadNextCharacterAsync(stream), 'u'); // read 'l' Validate(await reader.ReadNextCharacterAsync(stream), 'l'); // read 'l' Validate(await reader.ReadNextCharacterAsync(stream), 'l'); retVal = JValue.CreateNull(); break; default: // The only possibility is number - or malformed JSON string // Read until first non-number-char var lastReadChar = await reader.TryReadNextCharacterAsync(stream, c => { // TODO this isn't strictly according to spec... But will do for now. switch (c) { case '-': // Plus is ok after E/e, Minus is ok at beginning of number, and after E/e case '+': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case (Char)NUMBER_DECIMAL: case (Char)NUMBER_EXP_LOW: case (Char)NUMBER_EXP_UPPER: return(false); default: return(true); } }); // Unread previously read character, unless we arrived at the end. if (lastReadChar.HasValue) { stream.UnreadBytes(reader.GetByteCount(lastReadChar.Value)); } // If we have '.' or 'e' or 'E' then it is non-integer curIdx = prevIdx; var endIdx = stream.ReadBytesCount; Byte curASCIIByte; while ( curIdx < endIdx && (curASCIIByte = encoding.ReadASCIIByte(stream.Buffer, ref curIdx)) != NUMBER_DECIMAL && curASCIIByte != NUMBER_EXP_LOW && curASCIIByte != NUMBER_EXP_UPPER ) { ; } var isInteger = curIdx >= endIdx; var byteCount = endIdx - prevIdx; // TODO maybe use Decimal always for non-integers? PgSQL seems to use NUMERIC for non-integers. retVal = isInteger ? new JValue(encoding.ParseInt64Textual(stream.Buffer, ref prevIdx, (byteCount / encoding.BytesPerASCIICharacter, true))) : new JValue(Double.Parse(encoding.Encoding.GetString(stream.Buffer, prevIdx, byteCount), System.Globalization.CultureInfo.InvariantCulture.NumberFormat)); break; } return(retVal); }
private static async ValueTask <String> ReadJSONStringAsync( CharacterReader reader, StreamReaderWithResizableBuffer stream, Boolean startQuoteRead ) { Char charRead; var proceed = startQuoteRead; if (!startQuoteRead) { stream.EraseReadBytesFromBuffer(); charRead = await reader.ReadNextCharacterAsync(stream, c => !Char.IsWhiteSpace(c)); proceed = charRead == STR_START; } String str; var encoding = reader.Encoding; var eencoding = encoding.Encoding; if (proceed) { // At this point, we have read the starting quote, now read the contents. var asciiSize = encoding.BytesPerASCIICharacter; var startIdx = stream.ReadBytesCount; async ValueTask <Int32> DecodeUnicodeEscape() { var decodeIdx = stream.ReadBytesCount; await stream.ReadMoreOrThrow(4 *asciiSize); return((encoding.ReadHexDecimal(stream.Buffer, ref decodeIdx) << 8) | (encoding.ReadHexDecimal(stream.Buffer, ref decodeIdx))); } // Read string, but mind the escapes Int32 curIdx; do { curIdx = stream.ReadBytesCount; charRead = await reader.TryReadNextCharacterAsync(stream) ?? STR_END; if (charRead == STR_ESCAPE_PREFIX) { // Escape handling - next character decides what we will do charRead = await reader.TryReadNextCharacterAsync(stream) ?? STR_END; Byte replacementByte = 0; switch (charRead) { case STR_END: case STR_ESCAPE_PREFIX: case '/': // Actual value is just just read char minus the '\' replacementByte = (Byte)charRead; break; case 'b': // Backspace replacementByte = (Byte)'\b'; break; case 'f': // Form feed replacementByte = (Byte)'\f'; break; case 'n': // New line replacementByte = (Byte)'\n'; break; case 'r': // Carriage return replacementByte = (Byte)'\r'; break; case 't': // Horizontal tab replacementByte = (Byte)'\t'; break; case 'u': // Unicode sequence - followed by four hexadecimal digits var code = await DecodeUnicodeEscape(); if (code <= Char.MaxValue && code >= Char.MinValue && Char.IsSurrogate((charRead = (Char)code))) { var idxAfterDecode = stream.ReadBytesCount; Char?nullableChar; if ( (nullableChar = await reader.TryReadNextCharacterAsync(stream)).HasValue && nullableChar.Value == STR_ESCAPE_PREFIX && (nullableChar = await reader.TryReadNextCharacterAsync(stream)).HasValue && nullableChar.Value == 'u' ) { var code2 = await DecodeUnicodeEscape(); reader.GetBytes(charRead, (Char)code2, stream.Buffer, ref curIdx); } else { // Orphaned surrogate character... stream.UnreadBytes(stream.ReadBytesCount - idxAfterDecode); reader.GetBytes(charRead, stream.Buffer, ref curIdx); } } else { var codeStr = Char.ConvertFromUtf32(code); // Overwrite '\uXXXX' with actual character curIdx += eencoding.GetBytes(codeStr, 0, codeStr.Length, stream.Buffer, curIdx); } break; default: // Just let it slide curIdx = stream.ReadBytesCount; break; } if (replacementByte > 0) { // We just read ASCII char, which should be now replaced encoding.WriteASCIIByte(stream.Buffer, ref curIdx, replacementByte); } // Erase anything extra stream.EraseReadBufferSegment(curIdx, stream.ReadBytesCount - curIdx); // Always read next char charRead = (Char)0; } } while (charRead != STR_END); var strByteCount = stream.ReadBytesCount - startIdx - asciiSize; str = strByteCount <= 0 ? "" : eencoding.GetString(stream.Buffer, startIdx, strByteCount); } else { str = null; } return(str); }