private string ReadString()
        {
            // Note that we may have already "consumed" the beginning \" if we are calling this from ReadStringWithLookAhead()...
            // So, the following does not work....

            //        // char c = NextChar();
            //        char c = NextCharNoCheck();
            //        if(c == 0 || c != Symbols.DQUOTE) {
            //            // This cannot happen.
            //            throw new DotJsonMiniException("Expecting String. Invalid token encountered: c = " + c);
            //        }

            StringBuilder sb = new StringBuilder();

            char c = PeekChar();

            if (c == 0)
            {
                // This cannot happen.
                throw new DotJsonMiniException("Expecting String. Invalid token encountered: c = " + c);
            }
            else if (c == (char)CharSymbol.DQUOTE)
            {
                // consume the leading \".
                // c = NextCharNoCheck();
                SkipCharNoCheck();
                // sb.Append(c);   // No append: Remove the leading \".
            }
            else
            {
                // We are already at the beginning of the string.
                // proceed.
            }

            bool escaped = false;
            char d       = PeekChar();

            while (d != 0 && (escaped == true || d != (char)CharSymbol.DQUOTE))
            {
                // d = NextChar();
                d = NextCharNoCheck();
                if (escaped == false && d == (char)CharSymbol.BACKSLASH)
                {
                    escaped = true;
                    // skip
                }
                else
                {
                    if (escaped == true)
                    {
                        if (d == (char)CharSymbol.UNICODE_PREFIX)
                        {
                            // char[] hex = nextChars(4);
                            CyclicCharArray hex = NextCharsInQueue(4);
                            // TBD: validate ??

                            try {
                                // ????
                                // sb.Append((char) CharSymbol.BACKSLASH).Append(d).Append(hex);
                                char u = UnicodeUtil.GetUnicodeChar(hex);
                                if (u != 0)
                                {
                                    sb.Append(u);
                                }
                                else
                                {
                                    // ????
                                }
                            } catch (Exception e) {
                                throw new DotJsonMiniException("Invalid unicode char: hex = " + hex.ToString(), e);
                            }
                        }
                        else
                        {
                            if (Symbols.IsEscapableChar(d))
                            {
                                // TBD:
                                // Newline cannot be allowed within a string....
                                // ....
                                char e = Symbols.GetEscapedChar(d);
                                if (e != 0)
                                {
                                    sb.Append(e);
                                }
                                else
                                {
                                    // This cannot happen.
                                }
                            }
                            else
                            {
                                // error?
                                throw new DotJsonMiniException("Invalid escaped char: d = \\" + d);
                            }
                        }
                        // toggle the flag.
                        escaped = false;
                    }
                    else
                    {
                        // TBD:
                        // Exclude control characters ???
                        // ...

                        sb.Append(d);
                    }
                }
                d = PeekChar();
            }
            if (d == (char)CharSymbol.DQUOTE)
            {
                // d = NextChar();
                SkipCharNoCheck();
                // sb.Append(d);  // No append: Remove the trailing \".
            }
            else
            {
                // end of the json string.
                // error???
                // return null;
            }

            return(sb.ToString());
        }
        // Note:
        // This will cause parse failing
        //     if the longest string in JSON is longer than (CHARQUEUE_SIZE - READER_BUFF_SIZE)
        //     because Forward() will fail.
        // TBD:
        // There might be bugs when dealing with short strings, or \\u escaped unicodes at the end of a json string
        // ...
        private string ReadStringWithLookAhead()
        {
            // char c = NextChar();
            char c = NextCharNoCheck();

            if (c == 0 || c != (char)CharSymbol.DQUOTE)
            {
                // This cannot happen.
                throw new DotJsonMiniException("Expecting String. Invalid token encountered: c = " + c);
            }
            StringBuilder sb = new StringBuilder();
            // sb.Append(c);   // No append: Remove the leading \".

            bool escaped = false;


            int             chunkLength;
            CyclicCharArray charArray = PeekCharsInQueue(MAX_STRING_LOOKAHEAD_SIZE);

            if (charArray == null || (chunkLength = charArray.Length) == 0)
            {
                // ????
                throw new DotJsonMiniException("string token terminated unexpectedly.");
            }
            bool noMoreCharsInQueue = false;

            if (chunkLength < MAX_STRING_LOOKAHEAD_SIZE)
            {
                noMoreCharsInQueue = true;
            }
            bool needMore             = false;
            int  chunkCounter         = 0;
            int  totalLookAheadLength = 0;
            char d = charArray.GetChar(0);

            // System.Diagnostics.Debug.WriteLine(">>>>>>>>>>>>>>>>>> d = " + d);
            // System.Diagnostics.Debug.WriteLine(">>>>>>>>>>>>>>>>>> chunkLength = " + chunkLength);
            while ((chunkCounter < chunkLength - 1) &&               // 6 for "\\uxxxx".
                   d != 0 &&
                   (escaped == true || d != (char)CharSymbol.DQUOTE))
            {
                // d = charArray.GetChar(++chunkCounter);
                ++chunkCounter;

                // System.Diagnostics.Debug.WriteLine(">>>>>>>>>>>>>>>>>> d = " + d);

                if (escaped == false && d == (char)CharSymbol.BACKSLASH)
                {
                    escaped = true;
                    // skip
                }
                else
                {
                    if (escaped == true)
                    {
                        if (d == (char)CharSymbol.UNICODE_PREFIX)
                        {
                            if (chunkCounter < chunkLength - 4)
                            {
                                char[] hex = charArray.GetChars(chunkCounter, 4);
                                chunkCounter += 4;

                                try {
                                    // ????
                                    // sb.Append((char) CharSymbol.BACKSLASH).Append(d).Append(hex);
                                    char u = UnicodeUtil.GetUnicodeChar(hex);
                                    if (u != 0)
                                    {
                                        sb.Append(u);
                                    }
                                    else
                                    {
                                        // ????
                                    }
                                } catch (Exception e) {
                                    throw new DotJsonMiniException("Invalid unicode char: hex = " + String.Join <char>(",", hex), e);
                                }
                            }
                            else
                            {
                                if (noMoreCharsInQueue == false)
                                {
                                    needMore      = true;
                                    chunkCounter -= 2;                                         // Reset the counter backward for "\\u".
                                }
                                else
                                {
                                    // error
                                    throw new DotJsonMiniException("Invalid unicode char.");
                                }
                            }
                        }
                        else
                        {
                            if (Symbols.IsEscapableChar(d))
                            {
                                // TBD:
                                // Newline cannot be allowed within a string....
                                // ....
                                char e = Symbols.GetEscapedChar(d);
                                if (e != 0)
                                {
                                    sb.Append(e);
                                }
                                else
                                {
                                    // This cannot happen.
                                }
                            }
                            else
                            {
                                // error?
                                throw new DotJsonMiniException("Invalid escaped char: d = \\" + d);
                            }
                        }
                        // toggle the flag.
                        escaped = false;
                    }
                    else
                    {
                        // TBD:
                        // Exclude control characters ???
                        // ...

                        sb.Append(d);
                    }
                }

                if ((noMoreCharsInQueue == false) && (needMore || chunkCounter >= chunkLength - 1))
                {
                    totalLookAheadLength += chunkCounter;
                    chunkCounter          = 0;              // restart a loop.
                    needMore              = false;
                    // System.Diagnostics.Debug.WriteLine(">>>>>>>>>>>>>>>>>>>>>> AddAll() totalLookAheadLength = " + totalLookAheadLength);

                    try {
                        charArray = PeekCharsInQueue(totalLookAheadLength, MAX_STRING_LOOKAHEAD_SIZE);
                    } catch (DotJsonMiniException e) {
                        // Not sure if this makes sense....
                        // but since this error might have been due to the fact that we have encountered a looooong string,
                        // Try again???
                        // ...
                        // Note that this applies one, this particular, string only.
                        // Next time when we encounter a long string,
                        // this may be invoked again....
                        // ....
                        // We should be careful not to get into the infinite loop....
                        System.Diagnostics.Debug.WriteLine("string token might have been too long. Trying again with no look-ahead ReadString().");

                        // Reset the buffer (Peek() status) ????, and call the non "look ahead" version...
                        return(ReadString());                          // Is this starting from the beginning???
                        // ...
                    }
                    if (charArray == null || (chunkLength = charArray.Length) == 0)
                    {
                        // ????
                        throw new DotJsonMiniException("string token terminated unexpectedly.");
                    }
                    if (chunkLength < MAX_STRING_LOOKAHEAD_SIZE)
                    {
                        noMoreCharsInQueue = true;
                    }
                }

                d = charArray.GetChar(chunkCounter);
            }
            totalLookAheadLength += chunkCounter;
            SkipChars(totalLookAheadLength);
            d = PeekChar();

            if (d == (char)CharSymbol.DQUOTE)
            {
                // d = NextChar();
                SkipCharNoCheck();
                // sb.Append(d);  // No append: Remove the trailing \".
            }
            else
            {
                // end of the json string.
                // error???
                // return null;
            }

            return(sb.ToString());
        }
Example #3
0
        // Note:
        // This will cause parse failing
        //     if the longest string in JSON is longer than (CHARQUEUE_SIZE - READER_BUFF_SIZE)
        //     because forward() will fail.
        // TBD:
        // There might be bugs when dealing with short strings, or \\u escaped unicodes at the end of a json string
        private string ReadStringWithLookAhead()
        {
            // char c = nextChar();
            char c = NextCharNoCheck();

            if (c == 0 || c != Symbols.DQUOTE)
            {
                // This cannot happen.
                throw new UnexpectedSymbolException("Expecting String. Invalid token encountered: c = " + c, GetTailCharStream(), PeekCharStream());
            }
            StringBuilder sb = new StringBuilder();
            // sb.append(c);   // No append: Remove the leading \".

            bool            escaped = false;
            int             chunkLength;
            CyclicCharArray charArray = PeekCharsInQueue(MAX_STRING_LOOKAHEAD_SIZE);

            if (charArray == null || (chunkLength = charArray.Length) == 0)
            {
                // ????
                throw new UnexpectedEndOfStreamException("String token terminated unexpectedly.", GetTailCharStream(), PeekCharStream());
            }
            bool noMoreCharsInQueue = false;

            if (chunkLength < MAX_STRING_LOOKAHEAD_SIZE)
            {
                noMoreCharsInQueue = true;
            }
            bool needMore             = false;
            int  chunkCounter         = 0;
            int  totalLookAheadLength = 0;
            char d = charArray.GetChar(0);

            while ((chunkCounter < chunkLength - 1) && d != 0 && (escaped == true || d != Symbols.DQUOTE))   // 6 for "\\uxxxx".
                                                                                                             // d = charArray.GetChar(++chunkCounter);
            {
                ++chunkCounter;
                if (escaped == false && d == Symbols.BACKSLASH)
                {
                    escaped = true;
                    // skip
                }
                else
                {
                    if (escaped == true)
                    {
                        if (d == Symbols.UNICODE_PREFIX)
                        {
                            if (chunkCounter < chunkLength - 4)
                            {
                                char[] hex = charArray.GetChars(chunkCounter, 4);
                                chunkCounter += 4;
                                try {
                                    // ????
                                    // sb.append(Symbols.BACKSLASH).append(d).append(hex);
                                    char u = UnicodeUtil.GetUnicodeChar(hex);
                                    if (u != 0)
                                    {
                                        sb.Append(u);
                                    }
                                    else     // ????
                                    {
                                    }
                                } catch (Exception e) {
                                    // throw new UnexpectedSymbolException("Invalid unicode char: hex = " + Arrays.ToString(hex), e, GetTailCharStream(), PeekCharStream());
                                    throw new UnexpectedSymbolException("Invalid unicode char: hex = " + string.Join <char>(",", hex), e, GetTailCharStream(), PeekCharStream());
                                }
                            }
                            else
                            {
                                if (noMoreCharsInQueue == false)
                                {
                                    needMore      = true;
                                    chunkCounter -= 2; // Reset the counter backward for "\\u".
                                }
                                else                   // error
                                {
                                    throw new UnexpectedSymbolException("Invalid unicode char.", GetTailCharStream(), PeekCharStream());
                                }
                            }
                        }
                        else
                        {
                            if (Symbols.IsEscapableChar(d))
                            {
                                // TBD:
                                // Newline cannot be allowed within a string....
                                char e = Symbols.GetEscapedChar(d);
                                if (e != 0)
                                {
                                    sb.Append(e);
                                }
                                else     // This cannot happen.
                                {
                                }
                            }
                            else
                            {
                                // error?
                                throw new UnexpectedSymbolException("Invalid escaped char: d = \\" + d, GetTailCharStream(), PeekCharStream());
                            }
                        }
                        // toggle the flag.
                        escaped = false;
                    }
                    else
                    {
                        // TBD:
                        // Exclude control characters ???
                        sb.Append(d);
                    }
                }
                if ((noMoreCharsInQueue == false) && (needMore || chunkCounter >= chunkLength - 1))
                {
                    totalLookAheadLength += chunkCounter;
                    if (tracingEnabled)
                    {
                        this.tailBuffer.Push(charArray.GetArray(), chunkCounter);
                    }
                    chunkCounter = 0; // restart a loop.
                    needMore     = false;
                    // // log.warning(">>>>>>>>>>>>>>>>>>>>>> addAll() totalLookAheadLength = " + totalLookAheadLength);
                    try {
                        charArray = PeekCharsInQueue(totalLookAheadLength, MAX_STRING_LOOKAHEAD_SIZE);
                    } catch (UnexpectedEndOfStreamException e) {
                        // Not sure if this makes sense....
                        // but since this error might have been due to the fact that we have encountered a looooong string,
                        // Try again???
                        // Note that this makes it hard to reuse the parser instance....
                        // (in some way, it's a good thing, because the json files tend to be similar in the given context,
                        //     and if one file has a loooong string, then it's likely that others have long strings as well....)
                        // We should be careful not to get into the infinite loop....
                        if (LookAheadParsing)   // This if() is always true at this point...
                        {
                            DisableLookAheadParsing();
                            // log.warning("String token might have been too long.  Trying again after calling DisableLookAheadParsing().");
                            // Reset the buffer (Peek() status) ????, and call the non "look ahead" version...
                            return(ReadString()); // Is this starting from the beginning???
                        }
                        else                      // This cannot happen..
                        {
                            throw e;
                        }
                    }
                    if (charArray == null || (chunkLength = charArray.Length) == 0)
                    {
                        // ????
                        throw new UnexpectedEndOfStreamException("String token terminated unexpectedly.", GetTailCharStream(), PeekCharStream());
                    }
                    if (chunkLength < MAX_STRING_LOOKAHEAD_SIZE)
                    {
                        noMoreCharsInQueue = true;
                    }
                }
                d = charArray.GetChar(chunkCounter);
            }
            totalLookAheadLength += chunkCounter;
            if (tracingEnabled)
            {
                this.tailBuffer.Push(charArray.GetArray(), chunkCounter);
            }
            SkipChars(totalLookAheadLength);
            d = PeekChar();

            if (d == Symbols.DQUOTE)
            {
                // d = nextChar();
                SkipCharNoCheck();
                // sb.append(d);  // No append: Remove the trailing \".
            }
            else
            {
                // end of the json string.            // error???
                // return null;
            }
            return(sb.ToString());
        }