Example #1
0
        public bool ReadStartXref(PdfStream st)
        {
            if (st.PeekLine(true, true) == "startxref")
            {
                Utility.TraceLine("Read startxref");
                st.ReadLine();

                try
                {
                    string sline = st.RegexMatch("^([0-9]+)");
                    m_xrefLocation = Int32.Parse(sline);
                }
                catch (Exception except)
                {
                    throw new ParseException("Error reading xref inset: " + except.Message);
                }

                string line = st.ReadLine(true, true);
                while (line != "%%EOF")
                {
                    line = st.ReadLine(true, true);
                }
                return(true);
            }
            return(false);
        }
Example #2
0
        public bool ReadXref(PdfStream st)
        {
            if (st.PeekLine(true, true) == "xref")
            {
                Utility.TraceLine("Reading a XREF");
                st.ReadLine();

                while (st.RegexMatch("^([0-9]+ [0-9]+)", false) != "")
                {
                    Utility.TraceLine("Found XREF value block");

                    // Each xref block starts with a line with the starting number of the objects in the
                    // block, and then a count of the number of objects in the block PDF 1.5 p70
                    int startat = 0;
                    int count   = 0;
                    try
                    {
                        startat = Int32.Parse(st.RegexMatch("^([0-9]+)", true));
                        count   = Int32.Parse(st.RegexMatch("^[ \t]*([0-9]+)", true));
                        st.ReadLine();

                        // The you get one line per object in the block. They contain:
                        // <byte offset> <generation> <n = inuse, f = free>
                        Utility.TraceLine("XREF reading " + count + " lines");
                        for (int i = 0; i < count; i++)
                        {
                            string line       = st.ReadLine();
                            long   offset     = Int64.Parse(line.Substring(0, 10));
                            int    generation = Int32.Parse(line.Substring(11, 5));
                            string inuse      = line.Substring(17, 1);

                            Utility.TraceLine("XREF line: " + offset + " " + generation + " " + inuse);

                            if ((inuse == "n") && !m_linflag)
                            {
                                // PDF/A requires us to verify these offsets
                                Object obj = m_objects.Get(startat + i, generation);
                                if (offset != obj.StartedAt)
                                {
                                    m_pdfa = false;
                                    Utility.TraceLine("PDF/A: Object offset is incorrect " + offset + " != " +
                                                      obj.StartedAt + " (required by section 5.4)");
                                }
                            }
                        }
                    }
                    catch (Exception except)
                    {
                        throw new ParseException("Error reading xref block starter: " + except.Message);
                    }
                }
            }

            return(false);
        }
Example #3
0
        private void ReadStream(PdfStream st)
        {
            if (st.PeekLine() == "stream")
            {
                Utility.TraceLine("Read stream: ");
                st.ReadLine();
                while (!st.PeekLine(true, true).EndsWith("endstream") && !st.Eof)
                {
                    Utility.Trace("-");
                    m_stream.Append(st.ReadLineAsBytes());
                }
                Utility.TraceLine(" Done");
                st.ReadLine();

                // Sometimes there is a blank line after the endstream
                st.ConsumeWhitespace();
            }
        }
Example #4
0
        public string ReadComment(PdfStream st)
        {
            if (!st.Expect("%", false))
            {
                return("");
            }

            return(st.ReadLine());
        }
Example #5
0
 private void ReadDirectValue(PdfStream st)
 {
     if (m_dictionary.Count == 0)
     {
         while (st.PeekLine() != "endobj")
         {
             m_directValue += st.ReadLine();
         }
     }
     else
     {
         Utility.TraceLine("Not eligible for direct value");
     }
 }
Example #6
0
        public void ReadHeader(PdfStream st)
        {
            try
            {
                st.Expect("%PDF-", true);
                m_version = float.Parse(st.ReadBlock(3));
                st.ReadLine();

                // PDF/A requires a four character comment using only characters with a value
                // greater than 127 (PDF/A ISO Specification 5.2)
                string comment = ReadComment(st);
                if (comment == "")
                {
                    m_pdfa = false;
                    Utility.TraceLine("PDF/A: No section 5.2 comment line");
                }
                else if (comment.Length != 5)
                {
                    m_pdfa = false;
                    Utility.TraceLine("PDF/A: Section 5.2 comment is wrong size");
                }
                else
                {
                    for (int count = 1; count < 6; count++)
                    {
                        if (comment[count] < 128)
                        {
                            m_pdfa = false;
                            Utility.TraceLine("PDF/A: Section 5.2 comment character " + count + " is not binary");
                        }
                    }
                }

                Utility.TraceLine("Read header");
            }
            catch (Exception except)
            {
                throw new ParseException("No PDF header found: " + except.Message);
            }
        }
Example #7
0
        public void Parse(PdfStream st)
        {
            Utility.TraceLine("Read object parse pass for object " + Number + " " + Generation);

            try
            {
                m_dictionary.ReadDictionary(st);
                ReadStream(st);
                ReadDirectValue(st);
                if (st.PeekLine(true, true) != "endobj")
                {
                    throw new ParseException("Object " + Number + " " + Generation + " ended early: \"" +
                                             st.PeekLine(true, true) + "\"");
                }
                st.ReadLine();
            }
            catch (ParseException ex)
            {
                Utility.CrashDump(m_parseStartedAt, st.Position, st);
                throw new RuntimeException("Error processing object: " + ex.Message + " for object " + Number + " " + Generation);
            }
        }
Example #8
0
        public bool ReadTrailer(PdfStream st)
        {
            if (st.PeekLine() == "trailer")
            {
                st.ReadLine();
                m_trailer.ReadDictionary(st);

                // Enforce PDF/A requirements for the trailer (section 5.3 of draft standard)
                // I deliberately do all of these checks so that we can report _all_ errors
                // at once
                if (m_trailer.Get("Encrypt") != null)
                {
                    m_pdfa = false;
                    Utility.TraceLine("PDF/A: Forbidden encrypt dictionary item in section 5.3 document trailer");
                }
                if (m_trailer.Get("Info") != null)
                {
                    m_pdfa = false;
                    Utility.TraceLine("PDF/A: Forbidden info dictionary item in section 5.3 document trailer");
                }
                if (m_trailer.Get("Size") == null)
                {
                    m_pdfa = false;
                    Utility.TraceLine("PDF/A: Required size dictionary document trailer item is missing");
                }
                if (m_trailer.Get("ID") == null)
                {
                    m_pdfa = false;
                    Utility.TraceLine("PDF/A: Required ID dictionary document trailer item is missing");
                }

                Utility.TraceLine("Read trailer");
                return(true);
            }

            return(false);
        }
Example #9
0
        public Pdf(string filename)
        {
            PdfStream st = new PdfStream();

            st.FillFromFile(filename);

            long pos = st.Position;

            ReadHeader(st);

            // While we're still advancing through the stream, then all is good...
            bool eof = false;

            while (!eof && (st.Position != pos))
            {
                Utility.TraceLine("Starting read pass");
                pos = st.Position;
                ReadComment(st);

                Object obj = ReadObject(st);
                if (obj.Valid)
                {
                    m_objects.Add(obj);

                    if (m_objects.Count == 1)
                    {
                        DictionaryItem di = obj.Dictionary.Get("Linearized");
                        if (di.Valid)
                        {
                            if (di.Type == DictionaryItem.ValueType.Number)
                            {
                                if (di.ValueAsInteger() == 1)
                                {
                                    m_linflag    = true;
                                    m_linearized = true;
                                    Utility.TraceLine("Linearized PDF document found");
                                    Utility.TraceLine("PDF/A: Linearization of document is ignored (section 5.10)");
                                }
                            }
                            else
                            {
                                throw new ParseException("Linearized dictionary item is not a number");
                            }
                        }
                    }
                }

                ReadXref(st);
                if (ReadTrailer(st))
                {
                    if (m_linflag == true)
                    {
                        m_linflag = false;
                        Utility.TraceLine("Linearization trailer");
                    }
                    else
                    {
                        eof = true;
                    }
                }
                ReadStartXref(st);

                if (!eof && (st.Position == pos))
                {
                    st.ReadLine();
                }
            }

            if (st.Position != st.Length)
            {
                m_pdfa = false;
                Utility.TraceLine("PDF/A: Extraneous content after EOF marker breaches section 5.3 requirements");
            }
        }