Example #1
0
        /// <summary>
        /// Scans a literal string, contained between "(" and ")".
        /// </summary>
        public Symbol ScanLiteralString()
        {
            // Reference: 3.2.3  String Objects / Page 53
            // Reference: TABLE 3.32  String Types / Page 157

            Debug.Assert(_currChar == Chars.ParenLeft);
            _token = new StringBuilder();
            int  parenLevel = 0;
            char ch         = ScanNextChar(false);

            // Phase 1: deal with escape characters.
            while (ch != Chars.EOF)
            {
                switch (ch)
                {
                case '(':
                    parenLevel++;
                    break;

                case ')':
                    if (parenLevel == 0)
                    {
                        ScanNextChar(false);
                        // Is goto evil? We could move Phase 2 code here or create a subroutine for Phase 1.
                        goto Phase2;
                    }
                    parenLevel--;
                    break;

                case '\\':
                {
                    ch = ScanNextChar(false);
                    switch (ch)
                    {
                    case 'n':
                        ch = Chars.LF;
                        break;

                    case 'r':
                        ch = Chars.CR;
                        break;

                    case 't':
                        ch = Chars.HT;
                        break;

                    case 'b':
                        ch = Chars.BS;
                        break;

                    case 'f':
                        ch = Chars.FF;
                        break;

                    case '(':
                        ch = Chars.ParenLeft;
                        break;

                    case ')':
                        ch = Chars.ParenRight;
                        break;

                    case '\\':
                        ch = Chars.BackSlash;
                        break;

                    // AutoCAD PDFs my contain such strings: (\ )
                    case ' ':
                        ch = ' ';
                        break;

                    case Chars.CR:
                    case Chars.LF:
                        ch = ScanNextChar(false);
                        continue;

                    default:
                        if (char.IsDigit(ch))              // First octal character.
                        {
                            // Octal character code.
                            if (ch >= '8')
                            {
                                ParserDiagnostics.HandleUnexpectedCharacter(ch);
                            }

                            int n = ch - '0';
                            if (char.IsDigit(_nextChar))              // Second octal character.
                            {
                                ch = ScanNextChar(false);
                                if (ch >= '8')
                                {
                                    ParserDiagnostics.HandleUnexpectedCharacter(ch);
                                }

                                n = n * 8 + ch - '0';
                                if (char.IsDigit(_nextChar))              // Third octal character.
                                {
                                    ch = ScanNextChar(false);
                                    if (ch >= '8')
                                    {
                                        ParserDiagnostics.HandleUnexpectedCharacter(ch);
                                    }

                                    n = n * 8 + ch - '0';
                                }
                            }
                            ch = (char)n;
                        }
                        else
                        {
                            //TODO
                            // Debug.As sert(false, "Not implemented; unknown escape character.");
                            ParserDiagnostics.HandleUnexpectedCharacter(ch);
                        }
                        break;
                    }
                    break;
                }

                default:
                    break;
                }

                _token.Append(ch);
                ch = ScanNextChar(false);
            }

            // Phase 2: deal with UTF-16BE if necessary.
            // UTF-16BE Unicode strings start with U+FEFF ("þÿ"). There can be empty strings with UTF-16BE prefix.
Phase2:
            if (_token.Length >= 2 && _token[0] == '\xFE' && _token[1] == '\xFF')
            {
                // Combine two ANSI characters to get one Unicode character.
                StringBuilder temp   = _token;
                int           length = temp.Length;
                if ((length & 1) == 1)
                {
                    // TODO What does the PDF Reference say about this case? Assume (char)0 or treat the file as corrupted?
                    temp.Append(0);
                    ++length;
                    DebugBreak.Break();
                }
                _token = new StringBuilder();
                for (int i = 2; i < length; i += 2)
                {
                    _token.Append((char)(256 * temp[i] + temp[i + 1]));
                }
                return(_symbol = Symbol.UnicodeString);
            }
            // Adobe Reader also supports UTF-16LE.
            if (_token.Length >= 2 && _token[0] == '\xFF' && _token[1] == '\xFE')
            {
                // Combine two ANSI characters to get one Unicode character.
                StringBuilder temp   = _token;
                int           length = temp.Length;
                if ((length & 1) == 1)
                {
                    // TODO What does the PDF Reference say about this case? Assume (char)0 or treat the file as corrupted?
                    temp.Append(0);
                    ++length;
                    DebugBreak.Break();
                }
                _token = new StringBuilder();
                for (int i = 2; i < length; i += 2)
                {
                    _token.Append((char)(256 * temp[i + 1] + temp[i]));
                }
                return(_symbol = Symbol.UnicodeString);
            }
            return(_symbol = Symbol.String);
        }