Exemplo n.º 1
0
        private InlineImage ParseInlineImage()
        {
            /*
             * NOTE: Inline images use a peculiar syntax that's an exception to the usual rule
             * that the data in a content stream is interpreted according to the standard PDF syntax
             * for objects.
             */
            InlineImageHeader header;
            {
                List <PdfDirectObject> operands = new List <PdfDirectObject>();
                // Parsing the image entries...
                while (MoveNext() && TokenType != TokenTypeEnum.Keyword) // Not keyword (i.e. end at image data beginning (ID operator)).
                {
                    operands.Add((PdfDirectObject)ParsePdfObject());
                }
                header = new InlineImageHeader(operands);
            }

            InlineImageBody body;

            {
                // [FIX:51,74] Wrong 'EI' token handling on inline image parsing.
                bytes::IInputStream stream = Stream;
                stream.ReadByte(); // Should be the whitespace following the 'ID' token.
                bytes::Buffer data = new bytes::Buffer();
                while (true)
                {
                    int curByte1 = stream.ReadByte();
                    if (curByte1 == -1)
                    {
                        break;
                    }
                    int curByte2 = stream.ReadByte();
                    if (curByte2 == -1)
                    {
                        break;
                    }

                    if (((char)curByte1 == 'E' && (char)curByte2 == 'I'))
                    {
                        break;
                    }
                    if (((char)curByte1 == ' ' && (char)curByte2 == 'E'))
                    {
                        break;
                    }
                    data.Append((byte)curByte1);
                    data.Append((byte)curByte2);
                }
                body = new InlineImageBody(data);
            }

            return(new InlineImage(header, body));
        }
Exemplo n.º 2
0
        /**
         * <summary>Loads format-0 cmap subtable (Byte encoding table, i.e. Apple standard
         * character-to-glyph index mapping table).</summary>
         */
        private void LoadCMapFormat0(
            )
        {
            /*
             * NOTE: This is a simple 1-to-1 mapping of character codes to glyph indices.
             * The glyph collection is limited to 256 entries.
             */
            Symbolic     = true;
            GlyphIndexes = new Dictionary <int, int>(256);

            // Skip to the mapping array!
            FontData.Skip(4);
            // Glyph index array.
            // Iterating through the glyph indexes...
            for (
                int code = 0;
                code < 256;
                code++
                )
            {
                GlyphIndexes[
                    code                // Character code.
                ] = FontData.ReadByte() // Glyph index.
                ;
            }
        }
Exemplo n.º 3
0
 public int ReadByte(
     )
 {
     //TODO:harmonize with other Read*() method EOF exceptions!!!
     try
     { EnsureStream(); }
     catch (EndOfStreamException)
     { return(-1); }
     return(stream.ReadByte());
 }
Exemplo n.º 4
0
 public int ReadByte(
     )
 {
     if ((stream == null ||
          stream.Position >= stream.Length) &&
         !MoveNextStream())
     {
         return(-1); //TODO:harmonize with other Read*() method EOF exceptions!!!
     }
     return(stream.ReadByte());
 }
Exemplo n.º 5
0
        private InlineImage ParseInlineImage(
            )
        {
            /*
             * NOTE: Inline images use a peculiar syntax that's an exception to the usual rule
             * that the data in a content stream is interpreted according to the standard PDF syntax
             * for objects.
             */
            InlineImageHeader header;
            {
                List <PdfDirectObject> operands = new List <PdfDirectObject>();
                // Parsing the image entries...
                while (MoveNext() &&
                       TokenType != TokenTypeEnum.Keyword) // Not keyword (i.e. end at image data beginning (ID operator)).
                {
                    operands.Add((PdfDirectObject)ParsePdfObject());
                }
                header = new InlineImageHeader(operands);
            }

            InlineImageBody body;

            {
                bytes::IInputStream stream = Stream;
                MoveNext();
                bytes::Buffer data     = new bytes::Buffer();
                byte          prevByte = 0;
                while (true)
                {
                    byte curByte = (byte)stream.ReadByte();
                    if (prevByte == 'E' && curByte == 'I')
                    {
                        break;
                    }

                    data.Append(prevByte = curByte);
                }
                body = new InlineImageBody(data);
            }

            return(new InlineImage(
                       header,
                       body
                       ));
        }
Exemplo n.º 6
0
        /**
         * <summary>Parse the next token [PDF:1.6:3.1].</summary>
         * <remarks>
         *  Contract:
         *  <list type="bullet">
         *    <li>Preconditions:
         *      <list type="number">
         *        <item>To properly parse the current token, the pointer MUST be just before its starting (leading whitespaces are ignored).</item>
         *      </list>
         *    </item>
         *    <item>Postconditions:
         *      <list type="number">
         *        <item id="moveNext_contract_post[0]">When this method terminates, the pointer IS at the last byte of the current token.</item>
         *      </list>
         *    </item>
         *    <item>Invariants:
         *      <list type="number">
         *        <item>The byte-level position of the pointer IS anytime (during token parsing) at the end of the current token (whereas the 'current token' represents the token-level position of the pointer).</item>
         *      </list>
         *    </item>
         *    <item>Side-effects:
         *      <list type="number">
         *        <item>See <see href="#moveNext_contract_post[0]">Postconditions</see>.</item>
         *      </list>
         *    </item>
         *  </list>
         * </remarks>
         * <returns>Whether a new token was found.</returns>
         */
        public bool MoveNext(
            )
        {
            if (stream == null)
            {
                return(false);
            }

            /*
             * NOTE: It'd be interesting to evaluate an alternative regular-expression-based
             * implementation...
             */
            int c = 0;

            // Skip white-space characters [PDF:1.6:3.1.1].
            while (true)
            {
                c = stream.ReadByte();
                if (c == -1)
                {
                    /* NOTE: Current stream has finished. */
                    // Move to the next stream!
                    MoveNextStream();
                    // No more streams?
                    if (stream == null)
                    {
                        return(false);
                    }
                }
                else if (!IsWhitespace(c)) // Keep goin' till there's a white-space character...
                {
                    break;
                }
            }

            StringBuilder buffer = null;

            token = null;
            // Which character is it?
            switch (c)
            {
            case '/': // Name.
                tokenType = TokenTypeEnum.Name;

                buffer = new StringBuilder();
                while ((c = stream.ReadByte()) != -1)
                {
                    if (IsDelimiter(c) || IsWhitespace(c))
                    {
                        break;
                    }
                    // Is it an hexadecimal code [PDF:1.6:3.2.4]?
                    if (c == '#')
                    {
                        try
                        { c = (GetHex(stream.ReadByte()) << 4) + GetHex(stream.ReadByte()); }
                        catch
                        { throw new FileFormatException("Unexpected EOF (malformed hexadecimal code in name object).", stream.Position); }
                    }

                    buffer.Append((char)c);
                }
                stream.Skip(-1); // Recover the first byte after the current token.
                break;

            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
            case '.':
            case '-':
            case '+': // Number [PDF:1.6:3.2.2] | Indirect reference.
                switch (c)
                {
                case '.': // Decimal point.
                    tokenType = TokenTypeEnum.Real;
                    break;

                default:                               // Digit or signum.
                    tokenType = TokenTypeEnum.Integer; // By default (it may be real).
                    break;
                }

                // Building the number...
                buffer = new StringBuilder();
                do
                {
                    buffer.Append((char)c);
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }

                    if (c == '.')
                    {
                        tokenType = TokenTypeEnum.Real;
                    }
                    else if (c < '0' || c > '9')
                    {
                        break;
                    }
                } while(true);

                stream.Skip(-1); // Recover the first byte after the current token.

                break;

            case '[': // Array (begin).
                tokenType = TokenTypeEnum.ArrayBegin;

                break;

            case ']': // Array (end).
                tokenType = TokenTypeEnum.ArrayEnd;

                break;

            case '<': // Dictionary (begin) | Hexadecimal string.
                c = stream.ReadByte();
                if (c == -1)
                {
                    throw new FileFormatException("Unexpected EOF (isolated opening angle-bracket character).", stream.Position);
                }
                // Is it a dictionary (2nd angle bracket [PDF:1.6:3.2.6])?
                if (c == '<')
                {
                    tokenType = TokenTypeEnum.DictionaryBegin;
                    break;
                }

                // Hexadecimal string (single angle bracket [PDF:1.6:3.2.3]).
                tokenType = TokenTypeEnum.Hex;

                // [FIX:0.0.4:4] It skipped after the first hexadecimal character, missing it.
                buffer = new StringBuilder();
                while (c != '>') // NOT string end.
                {
                    buffer.Append((char)c);

                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        throw new FileFormatException("Unexpected EOF (malformed hex string).", stream.Position);
                    }
                }

                break;

            case '>': // Dictionary (end).
                c = stream.ReadByte();
                if (c != '>')
                {
                    throw new FileFormatException("Malformed dictionary.", stream.Position);
                }

                tokenType = TokenTypeEnum.DictionaryEnd;

                break;

            case '(': // Literal string.
                tokenType = TokenTypeEnum.Literal;

                buffer = new StringBuilder();
                int level = 0;
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }
                    if (c == '(')
                    {
                        level++;
                    }
                    else if (c == ')')
                    {
                        level--;
                    }
                    else if (c == '\\')
                    {
                        bool lineBreak = false;
                        c = stream.ReadByte();
                        switch (c)
                        {
                        case 'n':
                            c = '\n';
                            break;

                        case 'r':
                            c = '\r';
                            break;

                        case 't':
                            c = '\t';
                            break;

                        case 'b':
                            c = '\b';
                            break;

                        case 'f':
                            c = '\f';
                            break;

                        case '(':
                        case ')':
                        case '\\':
                            break;

                        case '\r':
                            lineBreak = true;
                            c         = stream.ReadByte();
                            if (c != '\n')
                            {
                                stream.Skip(-1);
                            }
                            break;

                        case '\n':
                            lineBreak = true;
                            break;

                        default:
                        {
                            // Is it outside the octal encoding?
                            if (c < '0' || c > '7')
                            {
                                break;
                            }

                            // Octal [PDF:1.6:3.2.3].
                            int octal = c - '0';
                            c = stream.ReadByte();
                            // Octal end?
                            if (c < '0' || c > '7')
                            {
                                c = octal; stream.Skip(-1); break;
                            }
                            octal = (octal << 3) + c - '0';
                            c     = stream.ReadByte();
                            // Octal end?
                            if (c < '0' || c > '7')
                            {
                                c = octal; stream.Skip(-1); break;
                            }
                            octal = (octal << 3) + c - '0';
                            c     = octal & 0xff;
                            break;
                        }
                        }
                        if (lineBreak)
                        {
                            continue;
                        }
                        if (c == -1)
                        {
                            break;
                        }
                    }
                    else if (c == '\r')
                    {
                        c = stream.ReadByte();
                        if (c == -1)
                        {
                            break;
                        }
                        if (c != '\n')
                        {
                            c = '\n'; stream.Skip(-1);
                        }
                    }
                    if (level == -1)
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }
                if (c == -1)
                {
                    throw new FileFormatException("Malformed literal string.", stream.Position);
                }

                break;

            case '%': // Comment.
                tokenType = TokenTypeEnum.Comment;

                buffer = new StringBuilder();
                while (true)
                {
                    c = stream.ReadByte();
                    if (c == -1 ||
                        IsEOL(c))
                    {
                        break;
                    }

                    buffer.Append((char)c);
                }

                break;

            default: // Keyword.
                tokenType = TokenTypeEnum.Keyword;

                buffer = new StringBuilder();
                do
                {
                    buffer.Append((char)c);
                    c = stream.ReadByte();
                    if (c == -1)
                    {
                        break;
                    }
                } while(!IsDelimiter(c) && !IsWhitespace(c));
                stream.Skip(-1); // Recover the first byte after the current token.

                break;
            }

            if (buffer != null)
            {
                /*
                 * Here we prepare the current token state.
                 */
                // Wich token type?
                switch (tokenType)
                {
                case TokenTypeEnum.Keyword:
                    token = buffer.ToString();
                    // Late recognition.
                    switch ((string)token)
                    {
                    case Keyword.False:
                    case Keyword.True: // Boolean.
                        tokenType = TokenTypeEnum.Boolean;
                        token     = bool.Parse((string)token);
                        break;

                    case Keyword.Null: // Null.
                        tokenType = TokenTypeEnum.Null;
                        token     = null;
                        break;
                    }
                    break;

                case TokenTypeEnum.Comment:
                case TokenTypeEnum.Hex:
                case TokenTypeEnum.Name:
                    token = buffer.ToString();
                    break;

                case TokenTypeEnum.Literal:
                    token = buffer.ToString();
                    // Late recognition.
                    if (((string)token).StartsWith("D:")) // Date.
                    {
                        tokenType = TokenTypeEnum.Date;
                        token     = PdfDate.ToDate((string)token);
                    }
                    break;

                case TokenTypeEnum.Integer:
                    token = Int32.Parse(
                        buffer.ToString(),
                        NumberStyles.Integer,
                        StandardNumberFormatInfo
                        );
                    break;

                case TokenTypeEnum.Real:
                    // [FIX:1668410] Parsing of float numbers was buggy (localized default number format).
                    token = Single.Parse(
                        buffer.ToString(),
                        NumberStyles.Float,
                        StandardNumberFormatInfo
                        );
                    break;
                }
            }

            return(true);
        }
Exemplo n.º 7
0
        private InlineImage ParseInlineImage(
            )
        {
            /*
             * NOTE: Inline images use a peculiar syntax that's an exception to the usual rule
             * that the data in a content stream is interpreted according to the standard PDF syntax
             * for objects.
             */
            InlineImageHeader header;
            {
                List <PdfDirectObject> operands = new List <PdfDirectObject>();
                // Parsing the image entries...
                while (MoveNext() &&
                       TokenType != TokenTypeEnum.Keyword) // Not keyword (i.e. end at image data beginning (ID operator)).
                {
                    operands.Add((PdfDirectObject)ParsePdfObject());
                }
                header = new InlineImageHeader(operands);
            }

            InlineImageBody body;

            {
                // [FIX:51,74] Wrong 'EI' token handling on inline image parsing.
                bytes::IInputStream stream = Stream;
                stream.ReadByte(); // Should be the whitespace following the 'ID' token.
                bytes::Buffer data           = new bytes::Buffer();
                var           endChunkBuffer = new sysIO::MemoryStream(3);
                int           endChunkIndex  = -1;
                while (true)
                {
                    int curByte = stream.ReadByte();
                    if (curByte == -1)
                    {
                        throw new PostScriptParseException("No 'EI' token found to close inline image data stream.");
                    }

                    if (endChunkIndex == -1)
                    {
                        if (IsWhitespace(curByte))
                        {
                            /*
                             * NOTE: Whitespace characters may announce the beginning of the end image operator.
                             */
                            endChunkBuffer.WriteByte((byte)curByte);
                            endChunkIndex++;
                        }
                        else
                        {
                            data.Append((byte)curByte);
                        }
                    }
                    else if (endChunkIndex == 0 && IsWhitespace(curByte))
                    {
                        /*
                         * NOTE: Only the last whitespace character may announce the beginning of the end image
                         * operator.
                         */
                        data.Append(endChunkBuffer.ToArray());
                        endChunkBuffer.SetLength(0);
                        endChunkBuffer.WriteByte((byte)curByte);
                    }
                    else if ((endChunkIndex == 0 && curByte == 'E') ||
                             (endChunkIndex == 1 && curByte == 'I'))
                    {
                        /*
                         * NOTE: End image operator characters.
                         */
                        endChunkBuffer.WriteByte((byte)curByte);
                        endChunkIndex++;
                    }
                    else if (endChunkIndex == 2 && IsWhitespace(curByte))
                    {
                        /*
                         * NOTE: The whitespace character after the end image operator completes the pattern.
                         */
                        break;
                    }
                    else
                    {
                        if (endChunkIndex > -1)
                        {
                            data.Append(endChunkBuffer.ToArray());
                            endChunkBuffer.SetLength(0);
                            endChunkIndex = -1;
                        }
                        data.Append((byte)curByte);
                    }
                }
                body = new InlineImageBody(data);
            }

            return(new InlineImage(
                       header,
                       body
                       ));
        }