예제 #1
0
        /// <summary>
        /// This will parse a PDF HEX string with fail fast semantic meaning that we stop if a not allowed character is found.
        /// This is necessary in order to detect malformed input and be able to skip to next object start.
        /// We assume starting '&lt;' was already read.
        /// </summary>
        private static CosString ParseHexString(IRandomAccessRead reader)
        {
            var sBuf = new StringBuilder();

            while (true)
            {
                int c = reader.Read();
                if (ReadHelper.IsHexDigit((char)c))
                {
                    sBuf.Append((char)c);
                }
                else if (c == '>')
                {
                    break;
                }
                else if (c < 0)
                {
                    throw new IOException("Missing closing bracket for hex string. Reached EOS.");
                }
                else if (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\b' || c == '\f')
                {
                }
                else
                {
                    // if invalid chars was found: discard last
                    // hex character if it is not part of a pair
                    if (sBuf.Length % 2 != 0)
                    {
                        sBuf.Remove(sBuf.Length - 1, 1);
                    }

                    // read till the closing bracket was found
                    do
                    {
                        c = reader.Read();
                    }while (c != '>' && c >= 0);

                    // might have reached EOF while looking for the closing bracket
                    // this can happen for malformed PDFs only. Make sure that there is
                    // no endless loop.
                    if (c < 0)
                    {
                        throw new IOException("Missing closing bracket for hex string. Reached EOS.");
                    }

                    // exit loop
                    break;
                }
            }
            return(CosString.ParseHex(sBuf.ToString()));
        }
예제 #2
0
        public CosName Parse([NotNull] IRandomAccessRead reader)
        {
            if (reader == null)
            {
                throw new ArgumentNullException(nameof(reader));
            }

            ReadHelper.ReadExpectedChar(reader, '/');

            using (var memoryStream = new MemoryStream())
                using (var writer = new BinaryWriter(memoryStream))
                {
                    int c = reader.Read();
                    while (c != -1)
                    {
                        byte ch = (byte)c;
                        if (ch == '#')
                        {
                            int ch1 = reader.Read();
                            int ch2 = reader.Read();
                            // Prior to PDF v1.2, the # was not a special character.  Also,
                            // it has been observed that various PDF tools do not follow the
                            // spec with respect to the # escape, even though they report
                            // PDF versions of 1.2 or later.  The solution here is that we
                            // interpret the # as an escape only when it is followed by two
                            // valid hex digits.
                            if (ReadHelper.IsHexDigit((char)ch1) && ReadHelper.IsHexDigit((char)ch2))
                            {
                                string hex = "" + (char)ch1 + (char)ch2;
                                try
                                {
                                    var byteToWrite = (byte)Convert.ToInt32(hex, 16);
                                    writer.Write(byteToWrite);
                                }
                                catch (FormatException e)
                                {
                                    throw new IOException("Error: expected hex digit, actual='" + hex + "'", e);
                                }
                                c = reader.Read();
                            }
                            else
                            {
                                // check for premature EOF
                                if (ch2 == -1 || ch1 == -1)
                                {
                                    //LOG.error("Premature EOF in BaseParser#parseCosName");
                                    c = -1;
                                    break;
                                }
                                reader.Unread(ch2);
                                c = ch1;
                                writer.Write(ch);
                            }
                        }
                        else if (ReadHelper.IsEndOfName(ch))
                        {
                            break;
                        }
                        else
                        {
                            writer.Write(ch);
                            c = reader.Read();
                        }
                    }
                    if (c != -1)
                    {
                        reader.Unread(c);
                    }

                    byte[] bytes = memoryStream.ToArray();
                    var    str   = ReadHelper.IsValidUtf8(bytes) ? Encoding.UTF8.GetString(memoryStream.ToArray()) : Encoding.GetEncoding("windows-1252").GetString(memoryStream.ToArray());
                    return(CosName.Create(str));
                }
        }