private string ReadString() { // Note that we may have already "consumed" the beginning \" if we are calling this from ReadStringWithLookAhead()... // So, the following does not work.... // // char c = NextChar(); // char c = NextCharNoCheck(); // if(c == 0 || c != Symbols.DQUOTE) { // // This cannot happen. // throw new DotJsonMiniException("Expecting String. Invalid token encountered: c = " + c); // } StringBuilder sb = new StringBuilder(); char c = PeekChar(); if (c == 0) { // This cannot happen. throw new DotJsonMiniException("Expecting String. Invalid token encountered: c = " + c); } else if (c == (char)CharSymbol.DQUOTE) { // consume the leading \". // c = NextCharNoCheck(); SkipCharNoCheck(); // sb.Append(c); // No append: Remove the leading \". } else { // We are already at the beginning of the string. // proceed. } bool escaped = false; char d = PeekChar(); while (d != 0 && (escaped == true || d != (char)CharSymbol.DQUOTE)) { // d = NextChar(); d = NextCharNoCheck(); if (escaped == false && d == (char)CharSymbol.BACKSLASH) { escaped = true; // skip } else { if (escaped == true) { if (d == (char)CharSymbol.UNICODE_PREFIX) { // char[] hex = nextChars(4); CyclicCharArray hex = NextCharsInQueue(4); // TBD: validate ?? try { // ???? // sb.Append((char) CharSymbol.BACKSLASH).Append(d).Append(hex); char u = UnicodeUtil.GetUnicodeChar(hex); if (u != 0) { sb.Append(u); } else { // ???? } } catch (Exception e) { throw new DotJsonMiniException("Invalid unicode char: hex = " + hex.ToString(), e); } } else { if (Symbols.IsEscapableChar(d)) { // TBD: // Newline cannot be allowed within a string.... // .... char e = Symbols.GetEscapedChar(d); if (e != 0) { sb.Append(e); } else { // This cannot happen. } } else { // error? throw new DotJsonMiniException("Invalid escaped char: d = \\" + d); } } // toggle the flag. escaped = false; } else { // TBD: // Exclude control characters ??? // ... sb.Append(d); } } d = PeekChar(); } if (d == (char)CharSymbol.DQUOTE) { // d = NextChar(); SkipCharNoCheck(); // sb.Append(d); // No append: Remove the trailing \". } else { // end of the json string. // error??? // return null; } return(sb.ToString()); }
// Note: // This will cause parse failing // if the longest string in JSON is longer than (CHARQUEUE_SIZE - READER_BUFF_SIZE) // because Forward() will fail. // TBD: // There might be bugs when dealing with short strings, or \\u escaped unicodes at the end of a json string // ... private string ReadStringWithLookAhead() { // char c = NextChar(); char c = NextCharNoCheck(); if (c == 0 || c != (char)CharSymbol.DQUOTE) { // This cannot happen. throw new DotJsonMiniException("Expecting String. Invalid token encountered: c = " + c); } StringBuilder sb = new StringBuilder(); // sb.Append(c); // No append: Remove the leading \". bool escaped = false; int chunkLength; CyclicCharArray charArray = PeekCharsInQueue(MAX_STRING_LOOKAHEAD_SIZE); if (charArray == null || (chunkLength = charArray.Length) == 0) { // ???? throw new DotJsonMiniException("string token terminated unexpectedly."); } bool noMoreCharsInQueue = false; if (chunkLength < MAX_STRING_LOOKAHEAD_SIZE) { noMoreCharsInQueue = true; } bool needMore = false; int chunkCounter = 0; int totalLookAheadLength = 0; char d = charArray.GetChar(0); // System.Diagnostics.Debug.WriteLine(">>>>>>>>>>>>>>>>>> d = " + d); // System.Diagnostics.Debug.WriteLine(">>>>>>>>>>>>>>>>>> chunkLength = " + chunkLength); while ((chunkCounter < chunkLength - 1) && // 6 for "\\uxxxx". d != 0 && (escaped == true || d != (char)CharSymbol.DQUOTE)) { // d = charArray.GetChar(++chunkCounter); ++chunkCounter; // System.Diagnostics.Debug.WriteLine(">>>>>>>>>>>>>>>>>> d = " + d); if (escaped == false && d == (char)CharSymbol.BACKSLASH) { escaped = true; // skip } else { if (escaped == true) { if (d == (char)CharSymbol.UNICODE_PREFIX) { if (chunkCounter < chunkLength - 4) { char[] hex = charArray.GetChars(chunkCounter, 4); chunkCounter += 4; try { // ???? // sb.Append((char) CharSymbol.BACKSLASH).Append(d).Append(hex); char u = UnicodeUtil.GetUnicodeChar(hex); if (u != 0) { sb.Append(u); } else { // ???? } } catch (Exception e) { throw new DotJsonMiniException("Invalid unicode char: hex = " + String.Join <char>(",", hex), e); } } else { if (noMoreCharsInQueue == false) { needMore = true; chunkCounter -= 2; // Reset the counter backward for "\\u". } else { // error throw new DotJsonMiniException("Invalid unicode char."); } } } else { if (Symbols.IsEscapableChar(d)) { // TBD: // Newline cannot be allowed within a string.... // .... char e = Symbols.GetEscapedChar(d); if (e != 0) { sb.Append(e); } else { // This cannot happen. } } else { // error? throw new DotJsonMiniException("Invalid escaped char: d = \\" + d); } } // toggle the flag. escaped = false; } else { // TBD: // Exclude control characters ??? // ... sb.Append(d); } } if ((noMoreCharsInQueue == false) && (needMore || chunkCounter >= chunkLength - 1)) { totalLookAheadLength += chunkCounter; chunkCounter = 0; // restart a loop. needMore = false; // System.Diagnostics.Debug.WriteLine(">>>>>>>>>>>>>>>>>>>>>> AddAll() totalLookAheadLength = " + totalLookAheadLength); try { charArray = PeekCharsInQueue(totalLookAheadLength, MAX_STRING_LOOKAHEAD_SIZE); } catch (DotJsonMiniException e) { // Not sure if this makes sense.... // but since this error might have been due to the fact that we have encountered a looooong string, // Try again??? // ... // Note that this applies one, this particular, string only. // Next time when we encounter a long string, // this may be invoked again.... // .... // We should be careful not to get into the infinite loop.... System.Diagnostics.Debug.WriteLine("string token might have been too long. Trying again with no look-ahead ReadString()."); // Reset the buffer (Peek() status) ????, and call the non "look ahead" version... return(ReadString()); // Is this starting from the beginning??? // ... } if (charArray == null || (chunkLength = charArray.Length) == 0) { // ???? throw new DotJsonMiniException("string token terminated unexpectedly."); } if (chunkLength < MAX_STRING_LOOKAHEAD_SIZE) { noMoreCharsInQueue = true; } } d = charArray.GetChar(chunkCounter); } totalLookAheadLength += chunkCounter; SkipChars(totalLookAheadLength); d = PeekChar(); if (d == (char)CharSymbol.DQUOTE) { // d = NextChar(); SkipCharNoCheck(); // sb.Append(d); // No append: Remove the trailing \". } else { // end of the json string. // error??? // return null; } return(sb.ToString()); }
// Note: // This will cause parse failing // if the longest string in JSON is longer than (CHARQUEUE_SIZE - READER_BUFF_SIZE) // because forward() will fail. // TBD: // There might be bugs when dealing with short strings, or \\u escaped unicodes at the end of a json string private string ReadStringWithLookAhead() { // char c = nextChar(); char c = NextCharNoCheck(); if (c == 0 || c != Symbols.DQUOTE) { // This cannot happen. throw new UnexpectedSymbolException("Expecting String. Invalid token encountered: c = " + c, GetTailCharStream(), PeekCharStream()); } StringBuilder sb = new StringBuilder(); // sb.append(c); // No append: Remove the leading \". bool escaped = false; int chunkLength; CyclicCharArray charArray = PeekCharsInQueue(MAX_STRING_LOOKAHEAD_SIZE); if (charArray == null || (chunkLength = charArray.Length) == 0) { // ???? throw new UnexpectedEndOfStreamException("String token terminated unexpectedly.", GetTailCharStream(), PeekCharStream()); } bool noMoreCharsInQueue = false; if (chunkLength < MAX_STRING_LOOKAHEAD_SIZE) { noMoreCharsInQueue = true; } bool needMore = false; int chunkCounter = 0; int totalLookAheadLength = 0; char d = charArray.GetChar(0); while ((chunkCounter < chunkLength - 1) && d != 0 && (escaped == true || d != Symbols.DQUOTE)) // 6 for "\\uxxxx". // d = charArray.GetChar(++chunkCounter); { ++chunkCounter; if (escaped == false && d == Symbols.BACKSLASH) { escaped = true; // skip } else { if (escaped == true) { if (d == Symbols.UNICODE_PREFIX) { if (chunkCounter < chunkLength - 4) { char[] hex = charArray.GetChars(chunkCounter, 4); chunkCounter += 4; try { // ???? // sb.append(Symbols.BACKSLASH).append(d).append(hex); char u = UnicodeUtil.GetUnicodeChar(hex); if (u != 0) { sb.Append(u); } else // ???? { } } catch (Exception e) { // throw new UnexpectedSymbolException("Invalid unicode char: hex = " + Arrays.ToString(hex), e, GetTailCharStream(), PeekCharStream()); throw new UnexpectedSymbolException("Invalid unicode char: hex = " + string.Join <char>(",", hex), e, GetTailCharStream(), PeekCharStream()); } } else { if (noMoreCharsInQueue == false) { needMore = true; chunkCounter -= 2; // Reset the counter backward for "\\u". } else // error { throw new UnexpectedSymbolException("Invalid unicode char.", GetTailCharStream(), PeekCharStream()); } } } else { if (Symbols.IsEscapableChar(d)) { // TBD: // Newline cannot be allowed within a string.... char e = Symbols.GetEscapedChar(d); if (e != 0) { sb.Append(e); } else // This cannot happen. { } } else { // error? throw new UnexpectedSymbolException("Invalid escaped char: d = \\" + d, GetTailCharStream(), PeekCharStream()); } } // toggle the flag. escaped = false; } else { // TBD: // Exclude control characters ??? sb.Append(d); } } if ((noMoreCharsInQueue == false) && (needMore || chunkCounter >= chunkLength - 1)) { totalLookAheadLength += chunkCounter; if (tracingEnabled) { this.tailBuffer.Push(charArray.GetArray(), chunkCounter); } chunkCounter = 0; // restart a loop. needMore = false; // // log.warning(">>>>>>>>>>>>>>>>>>>>>> addAll() totalLookAheadLength = " + totalLookAheadLength); try { charArray = PeekCharsInQueue(totalLookAheadLength, MAX_STRING_LOOKAHEAD_SIZE); } catch (UnexpectedEndOfStreamException e) { // Not sure if this makes sense.... // but since this error might have been due to the fact that we have encountered a looooong string, // Try again??? // Note that this makes it hard to reuse the parser instance.... // (in some way, it's a good thing, because the json files tend to be similar in the given context, // and if one file has a loooong string, then it's likely that others have long strings as well....) // We should be careful not to get into the infinite loop.... if (LookAheadParsing) // This if() is always true at this point... { DisableLookAheadParsing(); // log.warning("String token might have been too long. Trying again after calling DisableLookAheadParsing()."); // Reset the buffer (Peek() status) ????, and call the non "look ahead" version... return(ReadString()); // Is this starting from the beginning??? } else // This cannot happen.. { throw e; } } if (charArray == null || (chunkLength = charArray.Length) == 0) { // ???? throw new UnexpectedEndOfStreamException("String token terminated unexpectedly.", GetTailCharStream(), PeekCharStream()); } if (chunkLength < MAX_STRING_LOOKAHEAD_SIZE) { noMoreCharsInQueue = true; } } d = charArray.GetChar(chunkCounter); } totalLookAheadLength += chunkCounter; if (tracingEnabled) { this.tailBuffer.Push(charArray.GetArray(), chunkCounter); } SkipChars(totalLookAheadLength); d = PeekChar(); if (d == Symbols.DQUOTE) { // d = nextChar(); SkipCharNoCheck(); // sb.append(d); // No append: Remove the trailing \". } else { // end of the json string. // error??? // return null; } return(sb.ToString()); }